In [None]:
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.io as pio
import plotly.express as px
import plotly.offline as pyo

In [None]:
# Create a directory named "plots" if it doesn't exist
if not os.path.exists("plots"):
    os.mkdir("plots")

In [None]:
# Create separate scatter plots for each cluster
for i in range(kmeans.n_clusters):
    cluster_data = data[data['cluster'] == i]
    trace = go.Scatter(
        x=cluster_data['total_spent'],
        y=cluster_data['avg_spent_per_product'],
        mode='markers',
        marker=dict(color=cluster_data['cluster'], size=10),
        name=f'Cluster {i}'
    )
    layout = go.Layout(
        title=f'Scatter Plot of Cluster {i}',
        xaxis=dict(title='Total Spent'),
        yaxis=dict(title='Average Spent per Product')
    )
    fig = go.Figure(data=[trace], layout=layout)
    plotly_file = f"plots/plot_{i}_avg_versus_total.html"
    pyo.plot(fig, filename=plotly_file)

In [None]:
# Create a scatter plot with Plotly
# Randomly select a subset of data points
df_sample = data.sample(n=1000, random_state=42)
fig = px.scatter(df_sample, x='days_passed_since_last_order', color='cluster',
                 hover_data=['customer_id', 'qtt_order', 'total_spent', 'avg_spent_per_product'])
fig.update_traces(mode='markers', marker=dict(size=8))
plotly_file = f"plots/plot_days_passed_since_last_order.html"
pyo.offline.plot(fig,filename=plotly_file)

In [None]:
# Create separate scatter plots for each cluster
for i in range(kmeans.n_clusters):
    cluster_data = data[data['cluster'] == i]
    trace = go.Scatter(
        x=cluster_data['days_passed_since_last_order'],
        y=cluster_data['avg_spent_per_product'],
        mode='markers',
        marker=dict(color=cluster_data['cluster'], size=10),
        name=f'Cluster {i}'
    )
    layout = go.Layout(
        title=f'Scatter Plot of Days Passed Since Last Order for Cluster {i}',
        xaxis=dict(title='Days Passed Since Last Order'),
        yaxis=dict(title='Average Spent per Product')
    )
    fig = go.Figure(data=[trace], layout=layout)
    plotly_file = f"plots/plot_{i}_avg_spent_per_product.html"
    pyo.plot(fig, filename=plotly_file)

In [None]:
# Create separate scatter plots for each cluster
for i in range(kmeans.n_clusters):
    cluster_data = data[data['cluster'] == i]
    trace = go.Scatter(
        x=cluster_data['days_passed_since_last_order'],
        y=cluster_data['total_spent'],
        mode='markers',
        marker=dict(color=cluster_data['cluster'], size=10),
        name=f'Cluster {i}'
    )
    layout = go.Layout(
        title=f'Scatter Plot of Days Passed Since Last Order for Cluster {i}',
        xaxis=dict(title='Days Passed Since Last Order'),
        yaxis=dict(title='Total Spent')
    )
    fig = go.Figure(data=[trace], layout=layout)
    plotly_file = f"plots/plot_{i}_total_spent.html"
    pyo.plot(fig, filename=plotly_file)

In [None]:
# Create box plots for total_spent and days_passed_since_last_order
fig = px.box(data, x='cluster', y='total_spent', color='cluster', points='all',
             title='Box Plot of Total Spent per Cluster')
plotly_file = f"plots/boxplot_total_spent.html"
pyo.plot(fig, filename=plotly_file)

In [None]:
fig = px.box(data, x='cluster', y='days_passed_since_last_order', color='cluster', points='all',
             title='Box Plot of Days Passed Since Last Order per Cluster')
plotly_file = f"plots/boxplot_days_passed_since_last_order.html"
pyo.plot(fig, filename=plotly_file)

In [None]:
# Create a pie chart of the distribution of records in each cluster
df_pie = data.groupby('cluster').size().reset_index(name='counts')
fig = px.pie(df_pie, values='counts', names='cluster', title='Pie Chart of Record Distribution by Cluster')
plotly_file = f"plots/pie_plot_cluster_numbers.html"
pyo.plot(fig, filename=plotly_file)