# üìä Customer & Interaction Data Analysis
## Comprehensive Dataset Overview for Recommendation Engine

This notebook provides a detailed analysis of:
- **Customer demographics** and segmentation
- **Order patterns** and purchase behavior
- **Interaction types** (views, clicks, purchases)
- **Product popularity** and trends
- **Temporal patterns** in customer behavior

---

**Datasets Used:**
- `sample_customers.csv` - Customer profiles and demographics
- `sample_orders.csv` - Order transactions
- `sample_order_items.csv` - Product-level order details
- `sample_interactions.csv` - Customer browsing and interaction history

**Goal:** Understand customer behavior to build effective style-based recommendations.

In [None]:
# Data manipulation and analysis
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Styling
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")
%matplotlib inline

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("‚úì Libraries imported successfully")

In [None]:
# Load all datasets
customers = pd.read_csv('/project/data/sample_customers.csv')
orders = pd.read_csv('/project/data/sample_orders.csv')
order_items = pd.read_csv('/project/data/sample_order_items.csv')
interactions = pd.read_csv('/project/data/sample_interactions.csv')

# Convert date columns to datetime
customers['registration_date'] = pd.to_datetime(customers['registration_date'])
customers['last_active_date'] = pd.to_datetime(customers['last_active_date'])
orders['order_date'] = pd.to_datetime(orders['order_date'])
interactions['timestamp'] = pd.to_datetime(interactions['timestamp'])

print("üìä Dataset Summary:")
print("=" * 60)
print(f"Customers:      {len(customers):,} records")
print(f"Orders:         {len(orders):,} records")
print(f"Order Items:    {len(order_items):,} records")
print(f"Interactions:   {len(interactions):,} records")
print("=" * 60)

# Display first few rows of each dataset
print("\nüîç Sample Data Preview:")
customers.head(3)

In [None]:
print("üìã CUSTOMER DATA QUALITY")
print("=" * 60)
print(f"Shape: {customers.shape}")
print(f"\nData Types:\n{customers.dtypes}")
print(f"\nMissing Values:\n{customers.isnull().sum()}")
print(f"\nDuplicate Rows: {customers.duplicated().sum()}")

print("\n" + "=" * 60)
print("üìã ORDERS DATA QUALITY")
print("=" * 60)
print(f"Shape: {orders.shape}")
print(f"Missing Values:\n{orders.isnull().sum()}")
print(f"Order Status Distribution:\n{orders['order_status'].value_counts()}")

print("\n" + "=" * 60)
print("üìã INTERACTIONS DATA QUALITY")
print("=" * 60)
print(f"Shape: {interactions.shape}")
print(f"Interaction Types:\n{interactions['interaction_type'].value_counts()}")

In [None]:
# Create subplot layout
fig, axes = plt.subplots(2, 3, figsize=(18, 10))
fig.suptitle('Customer Demographics Overview', fontsize=16, fontweight='bold', y=1.02)

# 1. Gender Distribution
gender_counts = customers['gender'].value_counts()
axes[0, 0].pie(gender_counts.values, labels=gender_counts.index, autopct='%1.1f%%', 
               startangle=90, colors=sns.color_palette('pastel'))
axes[0, 0].set_title('Gender Distribution')

# 2. Age Group Distribution
age_order = ['18-25', '26-35', '36-45', '46+']
age_counts = customers['age_group'].value_counts().reindex(age_order, fill_value=0)
axes[0, 1].bar(age_counts.index, age_counts.values, color=sns.color_palette('viridis', len(age_counts)))
axes[0, 1].set_title('Age Group Distribution')
axes[0, 1].set_xlabel('Age Group')
axes[0, 1].set_ylabel('Count')
axes[0, 1].tick_params(axis='x', rotation=45)

# 3. Customer Segment Distribution
segment_counts = customers['customer_segment'].value_counts()
axes[0, 2].barh(segment_counts.index, segment_counts.values, color=sns.color_palette('muted'))
axes[0, 2].set_title('Customer Segments')
axes[0, 2].set_xlabel('Count')

# 4. Location (Country) Distribution
location_counts = customers['location_country'].value_counts()
axes[1, 0].bar(location_counts.index, location_counts.values, color=['#FF6B6B', '#4ECDC4', '#95E1D3'])
axes[1, 0].set_title('Customer Location (Country)')
axes[1, 0].set_xlabel('Country')
axes[1, 0].set_ylabel('Count')

# 5. Income Bracket Distribution
income_counts = customers['income_bracket'].value_counts()
axes[1, 1].pie(income_counts.values, labels=income_counts.index, autopct='%1.1f%%',
               startangle=90, colors=sns.color_palette('Set2'))
axes[1, 1].set_title('Income Bracket Distribution')

# 6. Age Distribution (Histogram)
axes[1, 2].hist(customers['age'], bins=15, color='skyblue', edgecolor='black', alpha=0.7)
axes[1, 2].set_title('Age Distribution (Detailed)')
axes[1, 2].set_xlabel('Age (years)')
axes[1, 2].set_ylabel('Frequency')
axes[1, 2].axvline(customers['age'].mean(), color='red', linestyle='--', 
                    label=f'Mean: {customers["age"].mean():.1f}')
axes[1, 2].legend()

plt.tight_layout()
plt.show()

print(f"\nüìä Key Statistics:")
print(f"  Average Age: {customers['age'].mean():.1f} years")
print(f"  Age Range: {customers['age'].min()} - {customers['age'].max()} years")
print(f"  Most Common Age Group: {customers['age_group'].mode()[0]}")
print(f"  Most Common Segment: {customers['customer_segment'].mode()[0]}")

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Customer Value & Purchase Behavior', fontsize=16, fontweight='bold', y=1.00)

# 1. Lifetime Value Distribution by Segment
segment_order = ['first-time', 'regular', 'vip']
ltv_by_segment = customers.groupby('customer_segment')['lifetime_value'].mean().reindex(segment_order)
bars = axes[0, 0].bar(ltv_by_segment.index, ltv_by_segment.values, 
                       color=['#FFB6C1', '#87CEEB', '#FFD700'], edgecolor='black')
axes[0, 0].set_title('Average Lifetime Value by Segment', fontweight='bold')
axes[0, 0].set_xlabel('Customer Segment')
axes[0, 0].set_ylabel('Average LTV (‚Ç¨)')
# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    axes[0, 0].text(bar.get_x() + bar.get_width()/2., height,
                    f'‚Ç¨{height:.0f}', ha='center', va='bottom', fontweight='bold')

# 2. Total Orders Distribution
axes[0, 1].hist(customers['total_orders'], bins=20, color='coral', edgecolor='black', alpha=0.7)
axes[0, 1].set_title('Distribution of Total Orders per Customer', fontweight='bold')
axes[0, 1].set_xlabel('Total Orders')
axes[0, 1].set_ylabel('Number of Customers')
axes[0, 1].axvline(customers['total_orders'].median(), color='red', linestyle='--',
                    label=f'Median: {customers["total_orders"].median():.0f}')
axes[0, 1].legend()

# 3. Average Order Value by Gender
aov_by_gender = customers.groupby('gender')['avg_order_value'].mean()
axes[1, 0].barh(aov_by_gender.index, aov_by_gender.values, 
                color=sns.color_palette('coolwarm', len(aov_by_gender)))
axes[1, 0].set_title('Average Order Value by Gender', fontweight='bold')
axes[1, 0].set_xlabel('Average Order Value (‚Ç¨)')
# Add value labels
for i, v in enumerate(aov_by_gender.values):
    axes[1, 0].text(v, i, f' ‚Ç¨{v:.2f}', va='center', fontweight='bold')

# 4. Lifetime Value vs Total Orders (Scatter)
scatter = axes[1, 1].scatter(customers['total_orders'], customers['lifetime_value'],
                             c=customers['customer_segment'].astype('category').cat.codes,
                             s=100, alpha=0.6, cmap='viridis', edgecolors='black')
axes[1, 1].set_title('Lifetime Value vs Total Orders', fontweight='bold')
axes[1, 1].set_xlabel('Total Orders')
axes[1, 1].set_ylabel('Lifetime Value (‚Ç¨)')
# Add trend line
z = np.polyfit(customers['total_orders'], customers['lifetime_value'], 1)
p = np.poly1d(z)
axes[1, 1].plot(customers['total_orders'], p(customers['total_orders']),
                "r--", alpha=0.8, label='Trend')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nüí∞ Customer Value Metrics:")
print(f"  Total Revenue: ‚Ç¨{customers['lifetime_value'].sum():,.2f}")
print(f"  Average LTV: ‚Ç¨{customers['lifetime_value'].mean():.2f}")
print(f"  Median LTV: ‚Ç¨{customers['lifetime_value'].median():.2f}")
print(f"  Average Orders per Customer: {customers['total_orders'].mean():.1f}")
print(f"  Average Order Value: ‚Ç¨{customers['avg_order_value'].mean():.2f}")

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Customer Interaction Patterns', fontsize=16, fontweight='bold', y=1.00)

# 1. Interaction Type Distribution
interaction_counts = interactions['interaction_type'].value_counts()
colors = plt.cm.Set3(range(len(interaction_counts)))
axes[0, 0].pie(interaction_counts.values, labels=interaction_counts.index,
               autopct='%1.1f%%', startangle=90, colors=colors)
axes[0, 0].set_title('Interaction Type Distribution', fontweight='bold')

# 2. Interactions per Customer
interactions_per_customer = interactions.groupby('customer_id').size()
axes[0, 1].hist(interactions_per_customer, bins=20, color='mediumpurple', 
                edgecolor='black', alpha=0.7)
axes[0, 1].set_title('Interactions per Customer', fontweight='bold')
axes[0, 1].set_xlabel('Number of Interactions')
axes[0, 1].set_ylabel('Number of Customers')
axes[0, 1].axvline(interactions_per_customer.mean(), color='red', linestyle='--',
                    label=f'Mean: {interactions_per_customer.mean():.1f}')
axes[0, 1].legend()

# 3. Device Type Distribution
device_counts = interactions['device_type'].value_counts()
axes[1, 0].bar(device_counts.index, device_counts.values, 
               color=['#FF6B6B', '#4ECDC4', '#45B7D1'])
axes[1, 0].set_title('Device Type Distribution', fontweight='bold')
axes[1, 0].set_xlabel('Device Type')
axes[1, 0].set_ylabel('Count')
# Add value labels
for i, (idx, val) in enumerate(device_counts.items()):
    axes[1, 0].text(i, val, str(val), ha='center', va='bottom', fontweight='bold')

# 4. Referrer Source Distribution
referrer_counts = interactions['referrer_source'].value_counts()
axes[1, 1].barh(referrer_counts.index, referrer_counts.values,
                color=sns.color_palette('viridis', len(referrer_counts)))
axes[1, 1].set_title('Traffic Source Distribution', fontweight='bold')
axes[1, 1].set_xlabel('Count')
# Add value labels
for i, v in enumerate(referrer_counts.values):
    axes[1, 1].text(v, i, f' {v}', va='center', fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nüì± Interaction Statistics:")
print(f"  Total Interactions: {len(interactions):,}")
print(f"  Unique Customers: {interactions['customer_id'].nunique()}")
print(f"  Unique Products: {interactions['product_id'].nunique()}")
print(f"  Avg Interactions per Customer: {len(interactions) / interactions['customer_id'].nunique():.1f}")
print(f"\n  Interaction Type Breakdown:")
for itype, count in interaction_counts.items():
    print(f"    {itype}: {count} ({count/len(interactions)*100:.1f}%)")

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Product & Order Analytics', fontsize=16, fontweight='bold', y=1.00)

# 1. Top Products by Interactions
top_products_interactions = interactions['product_id'].value_counts().head(10)
axes[0, 0].barh(range(len(top_products_interactions)), top_products_interactions.values,
                color=sns.color_palette('rocket', len(top_products_interactions)))
axes[0, 0].set_yticks(range(len(top_products_interactions)))
axes[0, 0].set_yticklabels([f'Product {pid}' for pid in top_products_interactions.index])
axes[0, 0].set_title('Top 10 Products by Interactions', fontweight='bold')
axes[0, 0].set_xlabel('Number of Interactions')
axes[0, 0].invert_yaxis()

# 2. Top Products by Purchases
purchases = interactions[interactions['interaction_type'] == 'purchase']
top_products_purchases = purchases['product_id'].value_counts().head(10)
axes[0, 1].barh(range(len(top_products_purchases)), top_products_purchases.values,
                color=sns.color_palette('mako', len(top_products_purchases)))
axes[0, 1].set_yticks(range(len(top_products_purchases)))
axes[0, 1].set_yticklabels([f'Product {pid}' for pid in top_products_purchases.index])
axes[0, 1].set_title('Top 10 Products by Purchases', fontweight='bold')
axes[0, 1].set_xlabel('Number of Purchases')
axes[0, 1].invert_yaxis()

# 3. Order Amount Distribution
completed_orders = orders[orders['order_status'] == 'completed']
axes[1, 0].hist(completed_orders['total_amount'], bins=20, color='lightgreen',
                edgecolor='black', alpha=0.7)
axes[1, 0].set_title('Order Amount Distribution', fontweight='bold')
axes[1, 0].set_xlabel('Order Amount (‚Ç¨)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].axvline(completed_orders['total_amount'].mean(), color='red',
                    linestyle='--', label=f'Mean: ‚Ç¨{completed_orders["total_amount"].mean():.2f}')
axes[1, 0].axvline(completed_orders['total_amount'].median(), color='blue',
                    linestyle='--', label=f'Median: ‚Ç¨{completed_orders["total_amount"].median():.2f}')
axes[1, 0].legend()

# 4. Payment Method Distribution
payment_counts = orders['payment_method'].value_counts()
axes[1, 1].pie(payment_counts.values, labels=payment_counts.index,
               autopct='%1.1f%%', startangle=90,
               colors=sns.color_palette('pastel'))
axes[1, 1].set_title('Payment Method Distribution', fontweight='bold')

plt.tight_layout()
plt.show()

print(f"\nüõçÔ∏è Product & Order Insights:")
print(f"  Total Orders: {len(orders)}")
print(f"  Completed Orders: {len(completed_orders)}")
print(f"  Average Order Value: ‚Ç¨{completed_orders['total_amount'].mean():.2f}")
print(f"  Total Revenue: ‚Ç¨{completed_orders['total_amount'].sum():,.2f}")
print(f"  Most Popular Product: {top_products_interactions.index[0]} ({top_products_interactions.values[0]} interactions}")

In [None]:
fig, axes = plt.subplots(2, 1, figsize=(16, 10))
fig.suptitle('Temporal Patterns in Customer Behavior', fontsize=16, fontweight='bold', y=0.995)

# 1. Orders Over Time
orders_by_date = orders.groupby(orders['order_date'].dt.date).size()
axes[0].plot(orders_by_date.index, orders_by_date.values, marker='o', 
             linewidth=2, markersize=6, color='#2E86AB', label='Orders')
axes[0].fill_between(orders_by_date.index, orders_by_date.values, alpha=0.3, color='#2E86AB')
axes[0].set_title('Daily Orders Over Time', fontweight='bold', fontsize=14)
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Number of Orders')
axes[0].grid(True, alpha=0.3)
axes[0].tick_params(axis='x', rotation=45)
axes[0].legend()

# Add trend line
from scipy import stats
x_numeric = np.arange(len(orders_by_date))
slope, intercept, r_value, p_value, std_err = stats.linregress(x_numeric, orders_by_date.values)
trend_line = slope * x_numeric + intercept
axes[0].plot(orders_by_date.index, trend_line, '--', color='red', 
             label=f'Trend (R¬≤={r_value**2:.3f})', linewidth=2)
axes[0].legend()

# 2. Interactions Over Time by Type
interactions['date'] = interactions['timestamp'].dt.date
interactions_pivot = interactions.groupby(['date', 'interaction_type']).size().unstack(fill_value=0)

# Stack area chart
axes[1].stackplot(interactions_pivot.index, 
                  *[interactions_pivot[col] for col in interactions_pivot.columns],
                  labels=interactions_pivot.columns,
                  alpha=0.7,
                  colors=sns.color_palette('husl', len(interactions_pivot.columns)))
axes[1].set_title('Daily Interactions by Type (Stacked)', fontweight='bold', fontsize=14)
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Number of Interactions')
axes[1].legend(loc='upper left', framealpha=0.9)
axes[1].grid(True, alpha=0.3)
axes[1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

print(f"\nüìÖ Temporal Insights:")
print(f"  Date Range (Orders): {orders['order_date'].min().date()} to {orders['order_date'].max().date()}")
print(f"  Date Range (Interactions): {interactions['timestamp'].min().date()} to {interactions['timestamp'].max().date()}")
print(f"  Peak Order Day: {orders_by_date.idxmax()} ({orders_by_date.max()} orders)")
print(f"  Average Orders per Day: {orders_by_date.mean():.1f}")

In [None]:
# Calculate funnel stages
funnel_stages = {
    'Views': len(interactions[interactions['interaction_type'] == 'view']),
    'Add to Cart': len(interactions[interactions['interaction_type'] == 'add_to_cart']),
    'Add to Wishlist': len(interactions[interactions['interaction_type'] == 'add_to_wishlist']),
    'Purchases': len(interactions[interactions['interaction_type'] == 'purchase'])
}

# Create funnel visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
fig.suptitle('Customer Conversion Funnel', fontsize=16, fontweight='bold')

# 1. Funnel bar chart
stages = list(funnel_stages.keys())
counts = list(funnel_stages.values())
colors_funnel = ['#FF6B6B', '#FFA07A', '#FFD93D', '#6BCB77']

bars = ax1.barh(stages, counts, color=colors_funnel, edgecolor='black', linewidth=2)
ax1.set_xlabel('Number of Interactions', fontweight='bold')
ax1.set_title('Conversion Funnel Stages', fontweight='bold')

# Add percentage labels
for i, (bar, count) in enumerate(zip(bars, counts)):
    pct = (count / counts[0]) * 100 if i > 0 else 100
    ax1.text(count, i, f' {count} ({pct:.1f}%)', va='center', fontweight='bold')

ax1.invert_yaxis()

# 2. Conversion rates
conversion_from_view_to_cart = (funnel_stages['Add to Cart'] / funnel_stages['Views']) * 100
conversion_from_cart_to_purchase = (funnel_stages['Purchases'] / funnel_stages['Add to Cart']) * 100
overall_conversion = (funnel_stages['Purchases'] / funnel_stages['Views']) * 100

metrics = ['View ‚Üí Cart', 'Cart ‚Üí Purchase', 'Overall (View ‚Üí Purchase)']
rates = [conversion_from_view_to_cart, conversion_from_cart_to_purchase, overall_conversion]

bars2 = ax2.bar(metrics, rates, color=['#4ECDC4', '#95E1D3', '#FFE66D'], 
                edgecolor='black', linewidth=2)
ax2.set_ylabel('Conversion Rate (%)', fontweight='bold')
ax2.set_title('Conversion Rates', fontweight='bold')
ax2.set_ylim(0, max(rates) * 1.2)

# Add value labels
for bar, rate in zip(bars2, rates):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height,
             f'{rate:.1f}%', ha='center', va='bottom', fontweight='bold', fontsize=12)

plt.tight_layout()
plt.show()

print(f"\nüéØ Funnel Metrics:")
print(f"  Views: {funnel_stages['Views']:,}")
print(f"  Add to Cart: {funnel_stages['Add to Cart']:,} ({conversion_from_view_to_cart:.1f}% of views)")
print(f"  Purchases: {funnel_stages['Purchases']:,} ({overall_conversion:.1f}% of views)")
print(f"\n  Cart Abandonment Rate: {100 - conversion_from_cart_to_purchase:.1f}%")

In [None]:
# Create comprehensive segment comparison
segment_analysis = customers.groupby('customer_segment').agg({
    'customer_id': 'count',
    'lifetime_value': 'mean',
    'total_orders': 'mean',
    'avg_order_value': 'mean',
    'age': 'mean'
}).round(2)

segment_analysis.columns = ['Count', 'Avg LTV (‚Ç¨)', 'Avg Orders', 'Avg Order Value (‚Ç¨)', 'Avg Age']

fig, axes = plt.subplots(2, 2, figsize=(16, 10))
fig.suptitle('Customer Segment Deep Dive', fontsize=16, fontweight='bold', y=1.00)

segment_order = ['first-time', 'regular', 'vip']

# 1. Segment size
segment_counts = customers['customer_segment'].value_counts().reindex(segment_order)
colors_seg = ['#FFB6C1', '#87CEEB', '#FFD700']
bars = axes[0, 0].bar(segment_counts.index, segment_counts.values, 
                       color=colors_seg, edgecolor='black', linewidth=2)
axes[0, 0].set_title('Customer Count by Segment', fontweight='bold')
axes[0, 0].set_ylabel('Number of Customers')
for bar, count in zip(bars, segment_counts.values):
    axes[0, 0].text(bar.get_x() + bar.get_width()/2., bar.get_height(),
                    str(count), ha='center', va='bottom', fontweight='bold')

# 2. Average metrics by segment
segment_metrics = segment_analysis.reindex(segment_order)
x = np.arange(len(segment_order))
width = 0.25

bars1 = axes[0, 1].bar(x - width, segment_metrics['Avg Orders'], width, 
                        label='Avg Orders', color='#FF6B6B')
bars2 = axes[0, 1].bar(x, segment_metrics['Avg Order Value (‚Ç¨)'] / 50, width,
                        label='Avg Order Value (√∑50)', color='#4ECDC4')
bars3 = axes[0, 1].bar(x + width, segment_metrics['Avg LTV (‚Ç¨)'] / 100, width,
                        label='Avg LTV (√∑100)', color='#95E1D3')

axes[0, 1].set_title('Segment Metrics Comparison (Scaled)', fontweight='bold')
axes[0, 1].set_xticks(x)
axes[0, 1].set_xticklabels(segment_order)
axes[0, 1].legend()
axes[0, 1].set_ylabel('Value (scaled)')

# 3. LTV distribution by segment
for segment in segment_order:
    segment_data = customers[customers['customer_segment'] == segment]['lifetime_value']
    axes[1, 0].hist(segment_data, bins=15, alpha=0.5, label=segment, edgecolor='black')

axes[1, 0].set_title('Lifetime Value Distribution by Segment', fontweight='bold')
axes[1, 0].set_xlabel('Lifetime Value (‚Ç¨)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 4. Segment metrics table visualization
table_data = []
for segment in segment_order:
    row = segment_metrics.loc[segment]
    table_data.append([
        segment.upper(),
        f"{row['Count']:.0f}",
        f"‚Ç¨{row['Avg LTV (‚Ç¨)']:.0f}",
        f"{row['Avg Orders']:.1f}",
        f"‚Ç¨{row['Avg Order Value (‚Ç¨)']:.0f}"
    ])

axes[1, 1].axis('off')
table = axes[1, 1].table(cellText=table_data,
                          colLabels=['Segment', 'Customers', 'Avg LTV', 'Avg Orders', 'Avg AOV'],
                          cellLoc='center',
                          loc='center',
                          colColours=['lightgray']*5)
table.auto_set_font_size(False)
table.set_fontsize(10)
table.scale(1, 2)
axes[1, 1].set_title('Segment Summary Table', fontweight='bold', pad=20)

plt.tight_layout()
plt.show()

print(f"\nüë• Segment Analysis:")
print(segment_analysis.to_string())
print(f"\nüí° Key Insights:")
print(f"  VIP customers represent {(segment_counts['vip']/segment_counts.sum())*100:.1f}% of customers")
print(f"  VIP LTV is {segment_metrics.loc['vip', 'Avg LTV (‚Ç¨)'] / segment_metrics.loc['first-time', 'Avg LTV (‚Ç¨)']:.1f}x first-time customers")

In [None]:
print("=" * 80)
print("üéØ KEY INSIGHTS & RECOMMENDATIONS FOR RECOMMENDATION ENGINE")
print("=" * 80)

print("\nüìä CUSTOMER BASE:")
print(f"  ‚Ä¢ Total Customers: {len(customers)}")
print(f"  ‚Ä¢ Gender Split: {customers['gender'].value_counts().to_dict()}")
print(f"  ‚Ä¢ Primary Age Group: {customers['age_group'].mode()[0]} ({(customers['age_group'].value_counts()[customers['age_group'].mode()[0]]/len(customers)*100):.0f}%)")
print(f"  ‚Ä¢ Dominant Market: {customers['location_country'].mode()[0]} ({(customers['location_country'].value_counts()[customers['location_country'].mode()[0]]/len(customers)*100):.0f}%)")

print("\nüí∞ REVENUE METRICS:")
print(f"  ‚Ä¢ Total Revenue: ‚Ç¨{customers['lifetime_value'].sum():,.2f}")
print(f"  ‚Ä¢ Average Customer LTV: ‚Ç¨{customers['lifetime_value'].mean():.2f}")
print(f"  ‚Ä¢ Average Order Value: ‚Ç¨{completed_orders['total_amount'].mean():.2f}")
print(f"  ‚Ä¢ VIP Customer LTV: ‚Ç¨{customers[customers['customer_segment']=='vip']['lifetime_value'].mean():.2f}")

print("\nüõçÔ∏è ENGAGEMENT METRICS:")
print(f"  ‚Ä¢ Total Interactions: {len(interactions):,}")
print(f"  ‚Ä¢ Avg Interactions per Customer: {len(interactions)/interactions['customer_id'].nunique():.1f}")
print(f"  ‚Ä¢ Conversion Rate: {overall_conversion:.1f}% (View ‚Üí Purchase)")
print(f"  ‚Ä¢ Cart Abandonment: {100 - conversion_from_cart_to_purchase:.1f}%")

print("\nüé® PRODUCT INSIGHTS:")
print(f"  ‚Ä¢ Unique Products Interacted: {interactions['product_id'].nunique()}")
print(f"  ‚Ä¢ Most Popular Product: {top_products_interactions.index[0]} ({top_products_interactions.values[0]} interactions)")
print(f"  ‚Ä¢ Avg Products per Order: {order_items.groupby('order_id').size().mean():.1f}")

print("\nüì± CHANNEL INSIGHTS:")
print(f"  ‚Ä¢ Top Device: {interactions['device_type'].mode()[0]} ({(interactions['device_type'].value_counts()[interactions['device_type'].mode()[0]]/len(interactions)*100):.0f}%)")
print(f"  ‚Ä¢ Top Traffic Source: {interactions['referrer_source'].mode()[0]} ({(interactions['referrer_source'].value_counts()[interactions['referrer_source'].mode()[0]]/len(interactions)*100):.0f}%)")

print("\n" + "=" * 80)
print("üí° RECOMMENDATIONS FOR STYLE-BASED RECOMMENDATION ENGINE:")
print("=" * 80)

print("\n1Ô∏è‚É£  PRIORITIZE VIP & REGULAR CUSTOMERS:")
print(f"    ‚Ä¢ VIP customers have {segment_metrics.loc['vip', 'Avg Orders']:.1f}x more orders than first-time")
print(f"    ‚Ä¢ Focus style profiling on the {len(customers[customers['customer_segment'].isin(['vip', 'regular'])])} high-value customers")

print("\n2Ô∏è‚É£  OPTIMIZE FOR MOBILE:")
print(f"    ‚Ä¢ {(interactions['device_type'].value_counts()['mobile']/len(interactions)*100):.0f}% of interactions are mobile")
print(f"    ‚Ä¢ Ensure recommendation UI is mobile-optimized")

print("\n3Ô∏è‚É£  LEVERAGE TOP PRODUCTS:")
print(f"    ‚Ä¢ Products {list(top_products_interactions.head(3).index)} drive most engagement")
print(f"    ‚Ä¢ Use these as anchors for similar product recommendations")

print("\n4Ô∏è‚É£  REDUCE CART ABANDONMENT:")
print(f"    ‚Ä¢ {100 - conversion_from_cart_to_purchase:.0f}% cart abandonment rate")
print(f"    ‚Ä¢ Use style-based 'Complete Your Look' recommendations at checkout")

print("\n5Ô∏è‚É£  GENDER-SPECIFIC RECOMMENDATIONS:")
print(f"    ‚Ä¢ Female customers: {len(customers[customers['gender']=='female'])} ({len(customers[customers['gender']=='female'])/len(customers)*100:.0f}%)")
print(f"    ‚Ä¢ Tailor visual style profiles by gender preferences")

print("\n6Ô∏è‚É£  IMPROVE FIRST-TIME CONVERSION:")
print(f"    ‚Ä¢ {len(customers[customers['customer_segment']=='first-time'])} first-time customers")
print(f"    ‚Ä¢ Use demographic + popular items until style profile builds")

print("\n" + "=" * 80)
print("‚úÖ NEXT STEPS:")
print("=" * 80)
print("  1. Extract image embeddings for top 100 products")
print("  2. Build style profiles for VIP + regular customers")
print("  3. A/B test style recommendations vs random on product pages")
print("  4. Measure CTR and conversion lift")
print("  5. Iterate and expand to full catalog")
print("=" * 80)

## 11. Key Insights & Recommendations Summary

## 10. Customer Segmentation Analysis

## 9. Conversion Funnel Analysis

## 8. Temporal Analysis: Orders & Interactions Over Time

## 7. Product Popularity & Order Analysis

## 6. Interaction Analysis

## 5. Customer Value & Purchase Behavior

## 4. Customer Demographics Analysis

## 3. Dataset Information & Quality Check

## 2. Load All Datasets

## 1. Import Required Libraries