# Advanced Visuals - Prediction Analysis

### Importing Data

In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

clv_df = pd.read_csv('../data/processed_data/predicted_clv.csv')
churn_df = pd.read_csv('../data/processed_data/predicted_churn.csv')
clv_df['recency'] = clv_df['recency'] * -1
churn_df['recency'] = churn_df['recency'] * -1

In [14]:
churn_df.describe()


Unnamed: 0,customer_id,recency,frequency,price_sum,price_mean,churn_90_prob,spend_90_prob
count,23570.0,23570.0,23570.0,23570.0,23570.0,23570.0,23570.0
mean,11785.5,303.579635,2.706788,97.003873,32.597058,72.799889,27.200111
std,6804.217258,149.626944,4.058153,208.757392,28.441418,10.935263,10.935263
min,1.0,0.0,1.0,0.0,0.0,36.32192,20.154734
25%,5893.25,171.0,1.0,18.99,14.99,69.27605,20.154734
50%,11785.5,384.0,1.0,41.77,24.98,79.63128,20.368715
75%,17677.75,416.0,3.0,98.855,39.824583,79.84527,30.723953
max,23570.0,455.0,180.0,11478.02,544.83,79.84527,63.67808


### Customer Activity Trend Line

In [5]:
fig = px.scatter(clv_df, 
                 x='recency', 
                 y='frequency', 
                 size='spend_90_total', 
                 color='price_mean',
                 trendline='ols', 
                 title='Customer Activity: Recency vs Frequency',
                 labels={'recency': 'Recency (days since last purchase)', 
                         'frequency': 'Frequency (number of purchases)',
                         'spend_90_total': 'Predicted Spend'})
fig.write_image('../visuals/customer_activity_trendline.png')
fig.show()

### Recency vs Spend Probability Correlation Heatmap

In [6]:
fig = px.density_heatmap(churn_df, 
                         x='recency', 
                         y='spend_90_prob', 
                         z='frequency', 
                         histfunc='avg',
                         title='Heatmap: Recency vs Spend Probability',
                         labels={'spend_90_flag': 'Probability of Purchase', 
                                 'recency': 'Recency (days)', 
                                 'frequency': 'Average Frequency'},
                         color_continuous_scale='Viridis')
fig.write_image('../visuals/recency_vs_spend_probability_heatmap.png')
fig.show()

### Parallel Coordinates for All Customer Metircs

In [24]:
fig = px.parallel_coordinates(clv_df, 
                              dimensions=['recency', 'frequency', 'price_sum', 'price_mean', 'spend_90_total'], 
                              color='spend_90_total',
                              title='Parallel Coordinates: Customer Metrics',
                              color_continuous_scale=px.colors.sequential.Inferno)
fig.write_image('../visuals/customer_metrics_predicted_clv_parallelcoords.png')
fig.show()