In [33]:
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.io as pio
import plotly.express as px
import plotly.offline as pyo
import pandas as pd
import os

In [34]:
# Create a directory named "plots" if it doesn't exist
if not os.path.exists("plots"):
    os.mkdir("plots")

In [35]:
data = pd.read_csv(os.path.join("..","data","RFM_data.csv"))

In [36]:
customer_list=["Occasional Engagers","Rapid Repeaters","High-Value Regulars","Loyal Spenders","Potential Lost"]
colormap=["darkblue", "purple", "pink", "orange", "yellow"]

In [37]:
# Create separate scatter plots for each cluster
for i in range(5):
    cluster_data = data[data['cluster'] == customer_list[i]]
    trace = go.Scatter(
        x=cluster_data['total_spent'],
        y=cluster_data['avg_spent_per_product'],
        mode='markers',
        marker=dict(color=colormap[i], size=10),
        name=f'Cluster {i}'
    )
    layout = go.Layout(
        xaxis=dict(title='Total Spent'),
        yaxis=dict(title='Average Spent per Product')
    )
    fig = go.Figure(data=[trace], layout=layout)
    plotly_file = f"plots/plot_{i}_avg_versus_total.html"
    pyo.plot(fig, filename=plotly_file)

In [38]:
# Randomly select a subset of data points
df_sample = data.sample(n=1000, random_state=42)
fig = px.scatter(df_sample, x='days_passed_since_last_order', color='cluster',
                 hover_data=['customer_id', 'qtt_order', 'total_spent', 'avg_spent_per_product'])
fig.update_traces(mode='markers', marker=dict(size=8))
plotly_file = f"plots/plot_days_passed_since_last_order.html"
pyo.offline.plot(fig,filename=plotly_file)

'plots/plot_days_passed_since_last_order.html'

In [39]:
# Create separate scatter plots for each cluster
for i in range(5):
    cluster_data = data[data['cluster'] == customer_list[i]]
    trace = go.Scatter(
        x=cluster_data['days_passed_since_last_order'],
        y=cluster_data['avg_spent_per_product'],
        mode='markers',
        marker=dict(color=colormap[i], size=10),
        name=f'Cluster {i}'
    )
    layout = go.Layout(
        xaxis=dict(title='Days Passed Since Last Order'),
        yaxis=dict(title='Average Spent per Product')
    )
    fig = go.Figure(data=[trace], layout=layout)
    plotly_file = f"plots/plot_{i}_avg_spent_per_product.html"
    pyo.plot(fig, filename=plotly_file)

In [40]:
# Create separate scatter plots for each cluster
for i in range(5):
    cluster_data = data[data['cluster'] == customer_list[i]]
    trace = go.Scatter(
        x=cluster_data['days_passed_since_last_order'],
        y=cluster_data['total_spent'],
        mode='markers',
        marker=dict(color=colormap[i], size=10),
        name=f'Cluster {i}'
    )
    layout = go.Layout(
        xaxis=dict(title='Days Passed Since Last Order'),
        yaxis=dict(title='Total Spent')
    )
    fig = go.Figure(data=[trace], layout=layout)
    plotly_file = f"plots/plot_{i}_total_spent.html"
    pyo.plot(fig, filename=plotly_file)

In [41]:
# Define the colormap for each cluster label
colormap = {
    'Loyal Spenders': 'darkblue',
    'Occasional Engagers': 'purple',
    'Rapid Repeaters': 'pink',
    'Potential Lost': 'orange',
    'High-Value Regulars': 'yellow'
}

fig = px.box(data, x='cluster', y='total_spent', color='cluster', points='all',
            color_discrete_map=colormap)
plotly_file = f"plots/boxplot_total_spent.html"
pio.write_html(fig, file=plotly_file, auto_open=False)

In [42]:
fig = px.box(data, x='cluster', y='days_passed_since_last_order', color='cluster', points='all',color_discrete_map=colormap)
plotly_file = f"plots/boxplot_days_passed_since_last_order.html"
pyo.plot(fig, filename=plotly_file)

'plots/boxplot_days_passed_since_last_order.html'

In [43]:
df_pie = data.groupby('cluster').size().reset_index(name='counts')
fig = px.pie(df_pie, values='counts', names='cluster',color_discrete_map=colormap)
plotly_file = f"plots/pie_plot_cluster_numbers.html"
pio.write_html(fig, file=plotly_file, auto_open=False)