In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

# Load processed data
df = pd.read_csv('data/processed/customer_features.csv')

# Set style
plt.style.use('seaborn')
sns.set_palette("husl")

# 1. Customer Distribution Analysis
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Age distribution
axes[0,0].hist(df['age'], bins=30, alpha=0.7)
axes[0,0].set_title('Age Distribution')
axes[0,0].set_xlabel('Age')

# Income distribution
axes[0,1].hist(np.log(df['income']), bins=30, alpha=0.7)
axes[0,1].set_title('Income Distribution (Log Scale)')
axes[0,1].set_xlabel('Log(Income)')

# Total spent distribution
axes[1,0].hist(df['total_spent'], bins=30, alpha=0.7)
axes[1,0].set_title('Total Spent Distribution')
axes[1,0].set_xlabel('Total Spent ($)')

# CLV distribution
axes[1,1].hist(df['clv'], bins=30, alpha=0.7)
axes[1,1].set_title('Customer Lifetime Value Distribution')
axes[1,1].set_xlabel('CLV ($)')

plt.tight_layout()
plt.show()

# 2. Correlation Analysis
correlation_matrix = df[['age', 'income', 'total_spent', 'avg_order_value', 
                        'order_count', 'monthly_sessions', 'clv']].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0)
plt.title('Feature Correlation Matrix')
plt.show()

# 3. Interactive Plotly visualizations
# Age vs CLV scatter plot
fig = px.scatter(df, x='age', y='clv', color='gender', 
                title='Customer Lifetime Value by Age and Gender')
fig.show()

# Monthly sessions vs Total spent
fig = px.scatter(df, x='monthly_sessions', y='total_spent', 
                size='order_count', color='clv',
                title='Web Activity vs Purchase Behavior')
fig.show()
