In [None]:
import pandas as pd
import plotly.express as px # interface simplifiée de Plotly, utilisée pour créer des visualisations interactives de manière rapide et concise.
import plotly.io as pio
import plotly.graph_objects as go # Contrairement à Plotly Express, il offre un contrôle total sur tous les aspects des visualisations.
pio.templates.default = "plotly_white" # permet de définir le style visuel qui sera appliqué à toutes les visualisations Plotly par défaut

rfm_data = pd.read_csv("rfm_data.csv")
print(rfm_data.head())

   CustomerID PurchaseDate  TransactionAmount ProductInformation  OrderID  \
0        8814   2023-04-11             943.31          Product C   890075   
1        2188   2023-04-11             463.70          Product A   176819   
2        4608   2023-04-11              80.28          Product A   340062   
3        2559   2023-04-11             221.29          Product A   239145   
4        9482   2023-04-11             739.56          Product A   194545   

   Location  
0     Tokyo  
1    London  
2  New York  
3    London  
4     Paris  


In [None]:

# Convert PurchaseDate to datetime format
rfm_data['PurchaseDate'] = pd.to_datetime(rfm_data['PurchaseDate'])

# Calculate the Recency, Frequency, and Monetary metrics
current_date = rfm_data['PurchaseDate'].max() + pd.Timedelta(days=1) # la date suivante après la date la plus récente enregistrée
rfm_table = rfm_data.groupby('CustomerID').agg({
    'PurchaseDate': lambda x: (current_date - x.max()).days,
    'OrderID': 'count',
    'TransactionAmount': 'sum'
}).reset_index()

# Rename the columns
rfm_table.columns = ['CustomerID', 'Recency', 'Frequency', 'Monetary']

rfm_table.head()


Unnamed: 0,CustomerID,Recency,Frequency,Monetary
0,1011,34,2,1129.02
1,1025,22,1,359.29
2,1029,1,1,704.99
3,1046,44,1,859.82
4,1049,14,1,225.72


In [None]:
print(sum(rfm_table['Frequency']==5) )
print(sum(rfm_table['Frequency']==4) )
print(sum(rfm_table['Frequency']==3) )
print(sum(rfm_table['Frequency']==2) )
print(sum(rfm_table['Frequency']==1) )


0
0
3
48
895


In [None]:
# Define quantile-based scoring functions
def r_score(x, quantiles):
    if x <= quantiles[0.2]:
        return 5
    elif x <= quantiles[0.4]:
        return 4
    elif x <= quantiles[0.6]:
        return 3
    elif x <= quantiles[0.8]:
        return 2
    else:
        return 1

def fm_score(x, quantiles):
    if x <= quantiles[0.2]:
        return 1
    elif x <= quantiles[0.4]:
        return 2
    elif x <= quantiles[0.6]:
        return 3
    elif x <= quantiles[0.8]:
        return 4
    else:
        return 5

# Calculate quantiles
quantiles = rfm_table.quantile(q=[0.2, 0.4, 0.6, 0.8]).to_dict()

# Score RFM metrics
rfm_table['R'] = rfm_table['Recency'].apply(r_score, args=(quantiles['Recency'],))
rfm_table['F'] = rfm_table['Frequency'].apply(fm_score, args=(quantiles['Frequency'],))
rfm_table['M'] = rfm_table['Monetary'].apply(fm_score, args=(quantiles['Monetary'],))

# Calculate RFM score
rfm_table['RFM_Score'] = rfm_table['R'].map(str) + rfm_table['F'].map(str) + rfm_table['M'].map(str)

rfm_table.head()


Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R,F,M,RFM_Score
0,1011,34,2,1129.02,3,5,5,355
1,1025,22,1,359.29,4,1,2,412
2,1029,1,1,704.99,5,1,4,514
3,1046,44,1,859.82,2,1,5,215
4,1049,14,1,225.72,4,1,2,412


In [None]:
def segment_rfm(row):
    if row['RFM_Score'] == '555':
        return 'Champions'
    elif row['R'] >= 4 and row['F'] >= 4:
        return 'Loyal Customers'
    elif row['R'] >= 4 and row['F'] >= 3:
        return 'Potential Loyalists'
    elif row['R'] == 5 and row['F'] == 1:
        return 'New Customers'
    elif row['R'] >= 3 and row['F'] >= 2:
        return 'Promising'
    elif row['R'] <= 2 and row['F'] >= 2:
        return 'Need Attention'
    else:
        return 'At Risk'

rfm_table['Segment'] = rfm_table.apply(segment_rfm, axis=1)

# Display the first few rows of the segmented RFM table
rfm_table.head()


Unnamed: 0,CustomerID,Recency,Frequency,Monetary,R,F,M,RFM_Score,Segment
0,1011,34,2,1129.02,3,5,5,355,Promising
1,1025,22,1,359.29,4,1,2,412,At Risk
2,1029,1,1,704.99,5,1,4,514,New Customers
3,1046,44,1,859.82,2,1,5,215,At Risk
4,1049,14,1,225.72,4,1,2,412,At Risk


In [None]:
segment_summary = rfm_table['Segment'].value_counts().reset_index()
segment_summary.columns = ['Segment', 'Count']
segment_summary


Unnamed: 0,Segment,Count
0,At Risk,713
1,New Customers,182
2,Loyal Customers,15
3,Promising,13
4,Champions,12
5,Need Attention,11


In [None]:
print(quantiles['Recency'])
print(quantiles['Frequency'])
print(quantiles['Monetary'])

{0.2: 13.0, 0.4: 25.0, 0.6: 38.0, 0.8: 48.0}
{0.2: 1.0, 0.4: 1.0, 0.6: 1.0, 0.8: 1.0}
{0.2: 218.64, 0.4: 440.15, 0.6: 644.07, 0.8: 821.22}


In [None]:
fig = px.bar(segment_summary, x='Segment', y='Count', title='RFM Value Segment Distribution')
fig.show()

In [None]:
# Supposez que 'rfm_table' est votre DataFrame RFM avec une colonne 'Segment'
fig = px.treemap(rfm_table, path=['Segment'], values='Monetary', title='RFM Customer Segments by Value')
fig.show()


In [None]:
# Filtrer pour le segment des champions
champions = rfm_table[rfm_table['Segment'] == 'Champions']

fig = px.box(champions, y=['Recency', 'Frequency', 'Monetary'], title='Distribution of RFM Values within Champions Segment')
fig.show()



In [None]:
import plotly.graph_objects as go

# Calculer la matrice de corrélation
correlation_matrix = champions[['Recency', 'Frequency', 'Monetary']].corr()

fig = go.Figure(data=go.Heatmap(
                   z=correlation_matrix.values,
                   x=correlation_matrix.columns,
                   y=correlation_matrix.columns,
                   colorscale='Viridis'))

fig.update_layout(title='Correlation Matrix of RFM Values within Champions Segment')
fig.show()



In [None]:
fig = px.bar(rfm_table, x='Segment', y='CustomerID', color='Segment',
             title='Comparison of RFM Segments',
             labels={'CustomerID': 'Number of Customers'})
fig.show()



In [None]:
import pandas as pd
import plotly.graph_objects as go

# Supposez que 'rfm_table' est le DataFrame RFM avec les scores et les segments
# Calculer les scores moyens de récence, fréquence et monétaire pour chaque segment
segment_scores = rfm_table.groupby('Segment')[['R', 'F', 'M']].mean().reset_index()

# Renommer les colonnes pour correspondre à vos noms de colonnes
segment_scores.columns = ['RFM Customer Segments', 'RecencyScore', 'FrequencyScore', 'MonetaryScore']

# Créer un graphique en barres groupées pour comparer les scores des segments
fig = go.Figure()

# Ajouter des barres pour le score de récence
fig.add_trace(go.Bar(
    x=segment_scores['RFM Customer Segments'],
    y=segment_scores['RecencyScore'],
    name='Recency Score',
    marker_color='rgb(158,202,225)'
))

# Ajouter des barres pour le score de fréquence
fig.add_trace(go.Bar(
    x=segment_scores['RFM Customer Segments'],
    y=segment_scores['FrequencyScore'],
    name='Frequency Score',
    marker_color='rgb(94,158,217)'
))

# Ajouter des barres pour le score monétaire
fig.add_trace(go.Bar(
    x=segment_scores['RFM Customer Segments'],
    y=segment_scores['MonetaryScore'],
    name='Monetary Score',
    marker_color='rgb(32,102,148)'
))

# Mettre à jour la mise en page
fig.update_layout(
    title='Comparison of RFM Segments based on Recency, Frequency, and Monetary Scores',
    xaxis_title='RFM Segments',
    yaxis_title='Score',
    barmode='group',
    showlegend=True
)

fig.show()
