In [2]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patheffects as path_effects


file_path = r"E:\Projects\Gamezone Orders Data\Data\Cleaned\gamezone_orders_data_cleaned.csv"
df = pd.read_csv(
    file_path,
    parse_dates=['purchase_ts_cleaned', 'ship_ts'],
    dtype={
        'purchase_year': 'Int64',
        'purchase_month': 'Int64',
        'time_to_ship': 'Int64',
        'revenue': 'float'
    },
    encoding='utf-8',
    keep_default_na=False,
    na_values=['']  # Only treat empty strings as NaN
)

In [3]:

# Create helper column: Order Count per Customer
order_count_per_customer = df.groupby('user_id')['order_id'].count().reset_index()
order_count_per_customer.columns = ['user_id', 'order_count']

# Merge back to main dataframe
df = df.merge(order_count_per_customer, on='user_id', how='left')

# Create pivot: Group customers by order frequency
def categorize_orders(count):
    if count == 1:
        return '1 Order'
    elif count == 2:
        return '2 Orders'
    else:
        return '3+ Orders'

order_count_per_customer['order_category'] = order_count_per_customer['order_count'].apply(categorize_orders)

# Pivot table: Count of customers in each category
customer_behavior_pivot = (
    order_count_per_customer.groupby('order_category')
    .agg(
        customer_count=('user_id', 'count')
    )
    .reset_index()
)

# Sort for proper order
category_order = ['1 Order', '2 Orders', '3+ Orders']
customer_behavior_pivot['order_category'] = pd.Categorical(
    customer_behavior_pivot['order_category'], 
    categories=category_order, 
    ordered=True
)
customer_behavior_pivot = customer_behavior_pivot.sort_values('order_category')

# Calculate percentages
total_customers = customer_behavior_pivot['customer_count'].sum()
customer_behavior_pivot['percentage'] = (
    customer_behavior_pivot['customer_count'] / total_customers * 100
).round(2)

# Calculate key metrics
one_time_customers = customer_behavior_pivot[customer_behavior_pivot['order_category'] == '1 Order']['customer_count'].values[0]
repeat_customers = customer_behavior_pivot[customer_behavior_pivot['order_category'].isin(['2 Orders', '3+ Orders'])]['customer_count'].sum()

one_time_pct = round((one_time_customers / total_customers * 100), 2)
repeat_rate = round((repeat_customers / total_customers * 100), 2)

# Compare to benchmark
benchmark_min = 20
benchmark_max = 40
benchmark_avg = 30

# INSIGHTS & ANALYSIS
print("\n" + "="*70)
print("üìä CUSTOMER BEHAVIOR ANALYSIS: REPEAT PURCHASE RATE")
print("="*70)

print(f"\nüìà KEY METRICS:")
print(f"   Total Customers: {total_customers:,}")
print(f"   One-Time Customers: {one_time_customers:,} ({one_time_pct}%)")
print(f"   Repeat Customers: {repeat_customers:,} ({repeat_rate}%)")

print(f"\nüîç CUSTOMER SEGMENTATION:")
for _, row in customer_behavior_pivot.iterrows():
    print(f"   {row['order_category']}: {row['customer_count']:,} customers ({row['percentage']}%)")

print(f"\nüìä BENCHMARK COMPARISON:")
print(f"   GameZone Repeat Rate: {repeat_rate}%")
print(f"   Industry Benchmark: {benchmark_min}-{benchmark_max}% (Avg: {benchmark_avg}%)")

if repeat_rate >= benchmark_min and repeat_rate <= benchmark_max:
    status = "‚úÖ WITHIN BENCHMARK RANGE"
    performance = "GOOD"
elif repeat_rate > benchmark_max:
    status = "üåü ABOVE BENCHMARK RANGE"
    performance = "EXCELLENT"
else:
    status = "‚ö†Ô∏è BELOW BENCHMARK RANGE"
    performance = "NEEDS IMPROVEMENT"

print(f"   Status: {status}")
print(f"   Performance: {performance}")

print(f"\nüéØ CUSTOMER LIFETIME VALUE (CLV) INDICATOR:")
avg_orders_per_customer = order_count_per_customer['order_count'].mean()
max_orders = order_count_per_customer['order_count'].max()
print(f"   Average Orders per Customer: {avg_orders_per_customer:.2f}")
print(f"   Maximum Orders by Single Customer: {max_orders}")

print(f"\nüí° STRATEGIC RECOMMENDATIONS:")
if repeat_rate < benchmark_min:
    print("   1. LOYALTY PROGRAM: Implement rewards program to incentivize repeat purchases")
    print("   2. EMAIL CAMPAIGNS: Send personalized follow-up emails after first purchase")
    print("   3. RETARGETING ADS: Run retargeting campaigns for one-time customers")
    print("   4. CUSTOMER FEEDBACK: Survey one-time customers to understand barriers")
elif repeat_rate >= benchmark_min and repeat_rate <= benchmark_max:
    print("   1. MAINTAIN MOMENTUM: Continue current customer retention strategies")
    print("   2. UPSELL/CROSS-SELL: Introduce product recommendations to increase order frequency")
    print("   3. VIP PROGRAM: Create exclusive benefits for 3+ order customers")
    print("   4. REFERRAL INCENTIVES: Encourage repeat customers to refer friends")
else:
    print("   1. SCALE SUCCESS: Document and replicate what's working")
    print("   2. SUBSCRIPTION MODEL: Consider introducing subscription options")
    print("   3. COMMUNITY BUILDING: Create brand community to strengthen loyalty")
    print("   4. CASE STUDY: Share success story with stakeholders")

print(f"\nüìù LOG INSIGHT:")
print(f"   Metric: Repeat Purchase Rate")
print(f"   Dimension: Customer Behavior")
print(f"   Observation: {repeat_rate}% of customers are repeat buyers ({status})")
print(f"   Stakeholders: CMO, CRM Manager, Customer Success Team")
print(f"   Priority: {'HIGH' if repeat_rate < benchmark_min else 'MEDIUM'}")
print(f"   Action: {'Implement retention strategies to increase repeat rate' if repeat_rate < benchmark_min else 'Optimize existing retention programs'}")

print("\n" + "="*70)


üìä CUSTOMER BEHAVIOR ANALYSIS: REPEAT PURCHASE RATE

üìà KEY METRICS:
   Total Customers: 19,851
   One-Time Customers: 17,962 (90.48%)
   Repeat Customers: 1,889 (9.52%)

üîç CUSTOMER SEGMENTATION:
   1 Order: 17,962 customers (90.48%)
   2 Orders: 1,772 customers (8.93%)
   3+ Orders: 117 customers (0.59%)

üìä BENCHMARK COMPARISON:
   GameZone Repeat Rate: 9.52%
   Industry Benchmark: 20-40% (Avg: 30%)
   Status: ‚ö†Ô∏è BELOW BENCHMARK RANGE
   Performance: NEEDS IMPROVEMENT

üéØ CUSTOMER LIFETIME VALUE (CLV) INDICATOR:
   Average Orders per Customer: 1.10
   Maximum Orders by Single Customer: 4

üí° STRATEGIC RECOMMENDATIONS:
   1. LOYALTY PROGRAM: Implement rewards program to incentivize repeat purchases
   2. EMAIL CAMPAIGNS: Send personalized follow-up emails after first purchase
   3. RETARGETING ADS: Run retargeting campaigns for one-time customers
   4. CUSTOMER FEEDBACK: Survey one-time customers to understand barriers

üìù LOG INSIGHT:
   Metric: Repeat Purchase Ra

In [4]:
customer_behavior_pivot

Unnamed: 0,order_category,customer_count,percentage
0,1 Order,17962,90.48
1,2 Orders,1772,8.93
2,3+ Orders,117,0.59


In [5]:
# Calculate order count per customer
customer_orders = df.groupby('user_id').agg(
    order_count=('order_id', 'count')
).reset_index()

# Categorize customers by order frequency
customer_orders['order_category'] = pd.cut(
    customer_orders['order_count'],
    bins=[0, 1, 2, float('inf')],
    labels=['1 Order', '2 Orders', '3+ Orders']
)

# Aggregate by category
customer_summary = customer_orders.groupby('order_category', observed=True).agg(
    customer_count=('user_id', 'count')
).reset_index()

# Calculate percentages
total_customers = customer_orders.shape[0]
customer_summary['percentage'] = (customer_summary['customer_count'] / total_customers * 100).round(2)

# Calculate metrics
one_time_customers = customer_summary[customer_summary['order_category'] == '1 Order']['customer_count'].values[0]
repeat_customers = total_customers - one_time_customers
repeat_rate = round((repeat_customers / total_customers * 100), 2)

# Benchmark comparison
benchmark_range = (20, 40)
benchmark_avg = 30

if repeat_rate < benchmark_range[0]:
    status, performance, priority = "‚ö†Ô∏è BELOW BENCHMARK", "NEEDS IMPROVEMENT", "HIGH"
elif repeat_rate > benchmark_range[1]:
    status, performance, priority = "üåü ABOVE BENCHMARK", "EXCELLENT", "MEDIUM"
else:
    status, performance, priority = "‚úÖ WITHIN BENCHMARK", "GOOD", "MEDIUM"

# Recommendations based on performance
recommendations = {
    "NEEDS IMPROVEMENT": [
        "LOYALTY PROGRAM: Implement rewards program to incentivize repeat purchases",
        "EMAIL CAMPAIGNS: Send personalized follow-up emails after first purchase",
        "RETARGETING ADS: Run retargeting campaigns for one-time customers",
        "CUSTOMER FEEDBACK: Survey one-time customers to understand barriers"
    ],
    "GOOD": [
        "MAINTAIN MOMENTUM: Continue current customer retention strategies",
        "UPSELL/CROSS-SELL: Introduce product recommendations to increase order frequency",
        "VIP PROGRAM: Create exclusive benefits for 3+ order customers",
        "REFERRAL INCENTIVES: Encourage repeat customers to refer friends"
    ],
    "EXCELLENT": [
        "SCALE SUCCESS: Document and replicate what's working",
        "SUBSCRIPTION MODEL: Consider introducing subscription options",
        "COMMUNITY BUILDING: Create brand community to strengthen loyalty",
        "CASE STUDY: Share success story with stakeholders"
    ]
}

# Print results
print("\n" + "="*70)
print("üìä CUSTOMER BEHAVIOR ANALYSIS: REPEAT PURCHASE RATE")
print("="*70)

print(f"\nüìà KEY METRICS:")
print(f"   Total Customers: {total_customers:,}")
print(f"   One-Time Customers: {one_time_customers:,} ({(one_time_customers/total_customers*100):.2f}%)")
print(f"   Repeat Customers: {repeat_customers:,} ({repeat_rate}%)")

print(f"\nüîç CUSTOMER SEGMENTATION:")
for _, row in customer_summary.iterrows():
    print(f"   {row['order_category']}: {row['customer_count']:,} customers ({row['percentage']}%)")

print(f"\nüìä BENCHMARK COMPARISON:")
print(f"   GameZone Repeat Rate: {repeat_rate}%")
print(f"   Industry Benchmark: {benchmark_range[0]}-{benchmark_range[1]}% (Avg: {benchmark_avg}%)")
print(f"   Status: {status}")
print(f"   Performance: {performance}")

print(f"\nüéØ CUSTOMER LIFETIME VALUE INDICATOR:")
print(f"   Average Orders per Customer: {customer_orders['order_count'].mean():.2f}")
print(f"   Maximum Orders by Single Customer: {customer_orders['order_count'].max()}")

print(f"\nüí° STRATEGIC RECOMMENDATIONS:")
for i, rec in enumerate(recommendations[performance], 1):
    print(f"   {i}. {rec}")

print(f"\nüìù LOG INSIGHT:")
print(f"   Metric: Repeat Purchase Rate")
print(f"   Dimension: Customer Behavior")
print(f"   Observation: {repeat_rate}% of customers are repeat buyers ({status})")
print(f"   Stakeholders: CMO, CRM Manager, Customer Success Team")
print(f"   Priority: {priority}")
print(f"   Action: {'Implement retention strategies to increase repeat rate' if performance == 'NEEDS IMPROVEMENT' else 'Optimize existing retention programs'}")

print("\n" + "="*70)




üìä CUSTOMER BEHAVIOR ANALYSIS: REPEAT PURCHASE RATE

üìà KEY METRICS:
   Total Customers: 19,851
   One-Time Customers: 17,962 (90.48%)
   Repeat Customers: 1,889 (9.52%)

üîç CUSTOMER SEGMENTATION:
   1 Order: 17,962 customers (90.48%)
   2 Orders: 1,772 customers (8.93%)
   3+ Orders: 117 customers (0.59%)

üìä BENCHMARK COMPARISON:
   GameZone Repeat Rate: 9.52%
   Industry Benchmark: 20-40% (Avg: 30%)
   Status: ‚ö†Ô∏è BELOW BENCHMARK
   Performance: NEEDS IMPROVEMENT

üéØ CUSTOMER LIFETIME VALUE INDICATOR:
   Average Orders per Customer: 1.10
   Maximum Orders by Single Customer: 4

üí° STRATEGIC RECOMMENDATIONS:
   1. LOYALTY PROGRAM: Implement rewards program to incentivize repeat purchases
   2. EMAIL CAMPAIGNS: Send personalized follow-up emails after first purchase
   3. RETARGETING ADS: Run retargeting campaigns for one-time customers
   4. CUSTOMER FEEDBACK: Survey one-time customers to understand barriers

üìù LOG INSIGHT:
   Metric: Repeat Purchase Rate
   Dimens

In [6]:
customer_summary

customer_orders

Unnamed: 0,user_id,order_count,order_category
0,.56E880,1,1 Order
1,.99E45,1,1 Order
2,0003cba8,1,1 Order
3,0005fdf2,1,1 Order
4,0007b840,1,1 Order
...,...,...,...
19846,fff8131d,2,2 Orders
19847,fff83841,1,1 Order
19848,fff9f549,1,1 Order
19849,fffb781c,1,1 Order


In [7]:
# Save results
customer_summary.to_csv("customer_behavior_summary.csv", index=False)
customer_orders.to_csv("customer_order_frequency.csv", index=False)