In [5]:
## importing libraries
import pandas as pd 
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
pio.templates.default ="plotly_white"

In [6]:
## reading data 
data = pd.read_csv(r"D:\PROJECTS\RFM Analysis\rfm_data.csv")
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location
0,8814,2023-04-11,943.31,Product C,890075,Tokyo
1,2188,2023-04-11,463.7,Product A,176819,London
2,4608,2023-04-11,80.28,Product A,340062,New York
3,2559,2023-04-11,221.29,Product A,239145,London
4,9482,2023-04-11,739.56,Product A,194545,Paris


## calculating RFM values

In [7]:
data.dtypes

CustomerID              int64
PurchaseDate           object
TransactionAmount     float64
ProductInformation     object
OrderID                 int64
Location               object
dtype: object

In [8]:
# converting purchase date to date time 
data['PurchaseDate']= pd.to_datetime(data['PurchaseDate'])

In [9]:
data.dtypes

CustomerID                     int64
PurchaseDate          datetime64[ns]
TransactionAmount            float64
ProductInformation            object
OrderID                        int64
Location                      object
dtype: object

In [10]:
#calculating Recency 

from datetime import datetime
data['Recency']= (datetime.now().date() - data['PurchaseDate'].dt.date)

In [11]:
data.Recency

0      784 days, 0:00:00
1      784 days, 0:00:00
2      784 days, 0:00:00
3      784 days, 0:00:00
4      784 days, 0:00:00
             ...        
995    724 days, 0:00:00
996    724 days, 0:00:00
997    724 days, 0:00:00
998    724 days, 0:00:00
999    724 days, 0:00:00
Name: Recency, Length: 1000, dtype: object

In [12]:
data['Recency']=data.Recency.astype('string')

In [13]:
data['Recency']=data['Recency'].str.slice(stop=3)

In [14]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784
1,2188,2023-04-11,463.7,Product A,176819,London,784
2,4608,2023-04-11,80.28,Product A,340062,New York,784
3,2559,2023-04-11,221.29,Product A,239145,London,784
4,9482,2023-04-11,739.56,Product A,194545,Paris,784


In [15]:
# calculating frequency 

frequency_data = data.groupby('CustomerID')['OrderID'].count().reset_index()
frequency_data.rename(columns= {'OrderID':'Frequency'}, inplace =True)
data = data.merge(frequency_data, on = 'CustomerID', how = 'left')

In [16]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1
1,2188,2023-04-11,463.7,Product A,176819,London,784,1
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1
3,2559,2023-04-11,221.29,Product A,239145,London,784,1
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1


In [17]:
# calculate Monetary Value 

monetary_data = data.groupby('CustomerID')['TransactionAmount'].sum().reset_index()
monetary_data.rename(columns={'TransactionAmount':'MonetaryValue'},inplace=True)
data = data.merge(monetary_data, on='CustomerID', how='left')

In [18]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1,943.31
1,2188,2023-04-11,463.7,Product A,176819,London,784,1,463.7
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1,80.28
3,2559,2023-04-11,221.29,Product A,239145,London,784,1,221.29
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1,739.56


## calculating RFM scores

In [19]:
# defining scoring criteria
recency_scores = [5, 4, 3, 2, 1]
frequency_scores = [1, 2, 3, 4, 5]
monetary_scores = [1, 2, 3, 4, 5]

# calculating

data['Recency']= data['Recency'].astype(int)
data['RecencyScore']= pd.cut(data['Recency'], bins = 5, labels=recency_scores)
data['FrequencyScore']=pd.cut(data['Frequency'], bins= 5, labels=frequency_scores)
data['MonetaryScore']=pd.cut(data['MonetaryValue'], bins = 5, labels=monetary_scores)

data['RecencyScore'] = data['RecencyScore'].astype(int)
data['FrequencyScore'] = data['FrequencyScore'].astype(int)
data['MonetaryScore'] = data['MonetaryScore'].astype(int)

In [20]:
data.dtypes

CustomerID                     int64
PurchaseDate          datetime64[ns]
TransactionAmount            float64
ProductInformation            object
OrderID                        int64
Location                      object
Recency                        int32
Frequency                      int64
MonetaryValue                float64
RecencyScore                   int32
FrequencyScore                 int32
MonetaryScore                  int32
dtype: object

In [21]:
#Calculating RFM score by combining the individual Scores

data['RFM_Score']= data['RecencyScore']+ data['FrequencyScore']+data['MonetaryScore']

In [22]:
data.head(10)

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1,943.31,1,1,2,4
1,2188,2023-04-11,463.7,Product A,176819,London,784,1,463.7,1,1,1,3
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1,80.28,1,1,1,3
3,2559,2023-04-11,221.29,Product A,239145,London,784,1,221.29,1,1,1,3
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1,739.56,1,1,2,4
5,8483,2023-04-11,375.23,Product C,691194,Paris,784,1,375.23,1,1,1,3
6,8317,2023-04-11,272.56,Product B,826847,New York,784,2,974.88,1,3,3,7
7,6911,2023-04-11,433.33,Product C,963918,Tokyo,784,1,433.33,1,1,1,3
8,8993,2023-04-12,16.55,Product D,112426,New York,783,1,16.55,1,1,1,3
9,3519,2023-04-12,464.63,Product C,139726,New York,783,1,464.63,1,1,1,3


## Segmenting Customers

In [23]:
# Create segment based on RFM score

segment_labels=['Low-Value','Mid-Value','High-Value']
data['Value_Segment']= pd.qcut(data['RFM_Score'], q=3, labels=segment_labels)

In [24]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score,Value_Segment
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1,943.31,1,1,2,4,Low-Value
1,2188,2023-04-11,463.7,Product A,176819,London,784,1,463.7,1,1,1,3,Low-Value
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1,80.28,1,1,1,3,Low-Value
3,2559,2023-04-11,221.29,Product A,239145,London,784,1,221.29,1,1,1,3,Low-Value
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1,739.56,1,1,2,4,Low-Value


In [25]:
# RFM segment Distribution
segment_counts=data['Value_Segment'].value_counts().reset_index()
segment_counts.columns = ['Value_Segment', 'Count']

In [26]:
segment_counts

Unnamed: 0,Value_Segment,Count
0,Low-Value,435
1,Mid-Value,386
2,High-Value,179


In [27]:
# colours
pastel_colors = px.colors.qualitative.Pastel

fig_segment_dist = px.bar(segment_counts, x='Value_Segment', y = 'Count',
                          color = 'Value_Segment',color_discrete_sequence=pastel_colors,
                         title = 'RFM Value Segment Distribution')

# update the layout
fig_segment_dist.update_layout(xaxis_title = 'RFM Value Segment',
                              yaxis_title ='Count' ,
                              showlegend = False)

fig_segment_dist.show()

## RFM customer segment 

In [28]:
# create a new column for RFM Customer Segments

data['RFM Customer Segments']= ''

# Assign RFM segments based on the RFM score 
data.loc[data['RFM_Score']>= 9 , 'RFM Customer Segments'] = 'Champions'
data.loc[(data['RFM_Score']>=6) & (data['RFM_Score']< 9), 'RFM Customer Segments'] = 'Potential Loyalists'
data.loc[(data['RFM_Score']>=5) & (data['RFM_Score']<6), 'RFM Customer Segments'] = 'At Risk Customers'
data.loc[(data['RFM_Score']>=4)& (data['RFM_Score']< 5), 'RFM Customer Segments'] = "Can't Lose"
data.loc[(data['RFM_Score']>= 3) & (data['RFM_Score'] < 4), 'RFM Customer Segments'] = "Lost"


In [29]:
data[['CustomerID','RFM Customer Segments']]

Unnamed: 0,CustomerID,RFM Customer Segments
0,8814,Can't Lose
1,2188,Lost
2,4608,Lost
3,2559,Lost
4,9482,Can't Lose
...,...,...
995,2970,Potential Loyalists
996,6669,Potential Loyalists
997,8836,Potential Loyalists
998,1440,Potential Loyalists


In [30]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score,Value_Segment,RFM Customer Segments
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1,943.31,1,1,2,4,Low-Value,Can't Lose
1,2188,2023-04-11,463.7,Product A,176819,London,784,1,463.7,1,1,1,3,Low-Value,Lost
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1,80.28,1,1,1,3,Low-Value,Lost
3,2559,2023-04-11,221.29,Product A,239145,London,784,1,221.29,1,1,1,3,Low-Value,Lost
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1,739.56,1,1,2,4,Low-Value,Can't Lose


## RFM Analysis 

In [31]:
segment_product_counts = data.groupby(['Value_Segment','RFM Customer Segments']).size().reset_index(name ='Count')





In [32]:
segment_product_counts = segment_product_counts.sort_values('Count',ascending=False)

fig_treemap_segment_product = px.treemap(segment_product_counts,
                                        path= ['Value_Segment','RFM Customer Segments'],
                                        values='Count',
                                        color='Value_Segment', color_discrete_sequence=px.colors.qualitative.Pastel,
                                        title = 'RFM Customer Segments by Value')

fig_treemap_segment_product.show()







In [33]:
## Now analyzing the distribution of RFM values within the champions segment 

champions_segment = data[data['RFM Customer Segments'] == 'Champions']

fig = go.Figure()
fig.add_trace(go.Box(y=champions_segment['RecencyScore'], name='Recency'))
fig.add_trace(go.Box(y=champions_segment['FrequencyScore'], name ='Frequency'))
fig.add_trace(go.Box(y=champions_segment['MonetaryScore'], name='Monetary'))

fig.update_layout(title ='Distribution of RFM Values within Champions Segment',
                 yaxis_title = 'RFM Value',showlegend = True)

fig.show()

In [34]:
## analyze the correlation of the recency, frequency, and monetary scores within the champions segment

correlation_matrix = champions_segment[['RecencyScore','FrequencyScore','MonetaryScore']].corr()

fig_heatmap = go.Figure(data=go.Heatmap(z= correlation_matrix.values,
                       x= correlation_matrix.columns,
                       y= correlation_matrix.columns,
                                       colorscale='RdBu',
                                       colorbar= dict(title='Correlation')))

fig_heatmap.update_layout(title='Correlation Matrix of RFM Values within CHampions Segment')

fig_heatmap.show()

In [35]:
## number of customers in all the segments

import plotly.colors

pastel_colors = plotly.colors.qualitative.Pastel

segment_counts = data['RFM Customer Segments'].value_counts()

# create a bar chart to compare segment counts
figure = go.Figure(data=[go.Bar(x = segment_counts.index, y = segment_counts.values,
                            marker = dict(color=pastel_colors))])

champions_color = 'rgb(158,202,225)'
figure.update_traces(marker_color=[champions_color if segment == 'Champions' else pastel_colors[i]
                 for i, segment in enumerate(segment_counts.index)],
                  marker_line_color = 'rgb(8,48,107)',
                  marker_line_width = 1.5, opacity=0.6
                 )


figure.update_layout(title='Comparison of RFM Segments',
                 xaxis_title='RFM Segments',
                 yaxis_title = 'Number of Customers',
                 showlegend=False)

figure.show()

In [36]:
## lets compare a recency, frequency and monetary scores with all the segments 

segment_scores = data.groupby('RFM Customer Segments')[['RecencyScore', 'FrequencyScore', 'MonetaryScore']].mean().reset_index()
fig = go.Figure()
fig.add_trace(go.Bar(
    x=segment_scores['RFM Customer Segments'],
    y=segment_scores['RecencyScore'],
    name='Recency Score',
    marker_color='rgb(158,202,225)'
))
fig.add_trace(go.Bar(
    x=segment_scores['RFM Customer Segments'],
    y=segment_scores['FrequencyScore'],
    name='Frequency Score',
    marker_color='rgb(94,158,217)'
))
fig.add_trace(go.Bar(
    x=segment_scores['RFM Customer Segments'],
    y=segment_scores['MonetaryScore'],
    name='Monetary Score',
    marker_color='rgb(32,102,148)'
))
fig.update_layout(
    title='Comparison of RFM Segments based on Recency, Frequency, and Monetary Scores',
    xaxis_title='RFM Segments',
    yaxis_title='Score',
    barmode='group',
    showlegend=True
)
fig.show()


In [37]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score,Value_Segment,RFM Customer Segments
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1,943.31,1,1,2,4,Low-Value,Can't Lose
1,2188,2023-04-11,463.7,Product A,176819,London,784,1,463.7,1,1,1,3,Low-Value,Lost
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1,80.28,1,1,1,3,Low-Value,Lost
3,2559,2023-04-11,221.29,Product A,239145,London,784,1,221.29,1,1,1,3,Low-Value,Lost
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1,739.56,1,1,2,4,Low-Value,Can't Lose


In [38]:
Location_RFM_score=data.groupby('Location')['RFM_Score'].sum().reset_index()
Location_RFM_score.columns = ['Location','Total_RFM_score']

In [39]:
Location_RFM_score

Unnamed: 0,Location,Total_RFM_score
0,London,1459
1,New York,1487
2,Paris,1302
3,Tokyo,1664


In [40]:
fig = go.Figure(go.Bar(x=Location_RFM_score.Location, y = Location_RFM_score.Total_RFM_score,
                      marker=dict(color=pastel_colors)
                      ))
fig.update_layout(xaxis_title='Location',
                 yaxis_title='RFM Score',
                  title ='RFM Value by Location',
                 showlegend=False)
fig.show()

In [41]:
data.groupby(['Value_Segment', 'Location'])['RFM_Score'].sum().reset_index()





Unnamed: 0,Value_Segment,Location,RFM_Score
0,Low-Value,London,452
1,Low-Value,New York,420
2,Low-Value,Paris,469
3,Low-Value,Tokyo,497
4,Mid-Value,London,588
5,Mid-Value,New York,655
6,Mid-Value,Paris,594
7,Mid-Value,Tokyo,675
8,High-Value,London,419
9,High-Value,New York,412


In [42]:
# Group by 'Value Segment' and 'Location' and sum the 'RFM_Score'
grouped_data = data.groupby(['Value_Segment', 'Location'])['RFM_Score'].sum().reset_index()

# Create a bar plot using Plotly Express
fig = px.bar(grouped_data, x='Location', y='RFM_Score', color='Value_Segment', barmode='group',
             title='RFM Score by Segment and Location',
             labels={'RFM_Score': 'Total RFM Score', 'Location': 'Location', 'Value_Segment': 'RFM Segment'})

# Show the figure
fig.show()





In [43]:
data.head()

Unnamed: 0,CustomerID,PurchaseDate,TransactionAmount,ProductInformation,OrderID,Location,Recency,Frequency,MonetaryValue,RecencyScore,FrequencyScore,MonetaryScore,RFM_Score,Value_Segment,RFM Customer Segments
0,8814,2023-04-11,943.31,Product C,890075,Tokyo,784,1,943.31,1,1,2,4,Low-Value,Can't Lose
1,2188,2023-04-11,463.7,Product A,176819,London,784,1,463.7,1,1,1,3,Low-Value,Lost
2,4608,2023-04-11,80.28,Product A,340062,New York,784,1,80.28,1,1,1,3,Low-Value,Lost
3,2559,2023-04-11,221.29,Product A,239145,London,784,1,221.29,1,1,1,3,Low-Value,Lost
4,9482,2023-04-11,739.56,Product A,194545,Paris,784,1,739.56,1,1,2,4,Low-Value,Can't Lose


In [44]:
data.ProductInformation.value_counts()

ProductInformation
Product C    277
Product D    253
Product B    245
Product A    225
Name: count, dtype: int64

## Making a Report using Dash framework

In [45]:
pip install dash


Note: you may need to restart the kernel to use updated packages.


In [46]:
# importing libraries
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.io as pio
import plotly.colors as pc

In [47]:
#Initializing the dash app
app = dash.Dash(__name__)

#Define the app layout using Bootstrap components
app.layout = html.Div([
    # Header
    html.H1("RFM Analysis Dashboard", className="text-center mb-4"),
    html.Div("Analyze customer segments based on RFM scores.", className="text-centre mb-4"),
    
    # Dropdown
    dcc.Dropdown(
    id = 'chart-type-dropdown',
    options=[
        {'label':'RFM Value Segment Distribution', 'value':'segment_distribution'},
        {'label': 'Distribution of RFM Values within Customer Segment', 'value': 'RFM_distribution'},
        {'label': 'Correlation Matrix of RFM Values within Champions Segment', 'value': 'correlation_matrix'},
        {'label': 'Comparison of RFM Segments', 'value': 'segment_comparison'},
        {'label': 'Comparison of RFM Segments based on Scores', 'value': 'segment_scores'},
    ],
    value = 'segment_distribution', # default
    className="mb-4"),
    
    # Graph container
    dcc.Graph(id='rfm-chart',className='mb-4')
])

In [48]:
# Define callback to update the selected chart 
@app.callback(
    Output('rfm-chart','figure'),
    [Input('chart-type-dropdown','value')]
)

def update_chart(selected_chart_type):
    if selected_chart_type == 'segment_distribution':
        return fig_segment_dist
    elif selected_chart_type == 'RFM_distribution':
        return fig_treemap_segment_product
    elif selected_chart_type == 'correlation_matrix':
        return fig_heatmap
    elif selected_chart_type == 'segment_comparison':
        return figure
    elif selected_chart_type == 'segment_scores':
        return fig
    
    # Return a default chart if no valid selection
    return fig_segment_dist


if __name__ == '__main__':
    app.run_server(debug=True, port=8052)