In [40]:
import numpy as np
import pandas as pd

In [41]:
import pickle

# Load the processed DataFrames from variables.pkl
with open('variables.pkl', 'rb') as f:
    vars_dict = pickle.load(f)

# Extract the DataFrames
customer_df = vars_dict['customer']
date_df = vars_dict['date']
facts_df = vars_dict['facts']
product_df = vars_dict['product']
device_df = vars_dict['device']

# Verify the DataFrames are loaded
print("DataFrames loaded successfully:")
print(f"customer_df shape: {customer_df.shape}")
print(f"date_df shape: {date_df.shape}")
print(f"facts_df shape: {facts_df.shape}")
print(f"product_df shape: {product_df.shape}")
print(f"device_df shape: {device_df.shape}")


DataFrames loaded successfully:
customer_df shape: (1000, 5)
date_df shape: (366, 2)
facts_df shape: (5000, 9)
product_df shape: (25, 4)
device_df shape: (5, 3)


In [42]:
facts_df.head()

Unnamed: 0,session_id,customer_id,product_id,device_id,date_id,quantity,abandon_date,abandon_time,is_abandoned
0,1,979,20,5,252,4,2023-07-13,2025-12-18 01:09:01,1
1,2,373,12,3,353,2,2023-11-07,2025-12-18 13:40:54,1
2,3,32,3,4,286,2,2023-03-03,2025-12-18 20:37:31,1
3,4,631,25,1,365,2,NaT,NaT,0
4,5,127,9,3,163,3,2023-07-22,2025-12-18 06:41:51,1


In [43]:
abandoned_sessions = facts_df[facts_df['is_abandoned']==0]
abandoned_sessions.count()

session_id      2476
customer_id     2476
product_id      2476
device_id       2476
date_id         2476
quantity        2476
abandon_date       0
abandon_time       0
is_abandoned    2476
dtype: int64

In [44]:
total_sessions = facts_df['session_id'].count() 
total_sessions

np.int64(5000)

In [45]:
abandoned_rate = abandoned_sessions.count() / total_sessions
abandoned_rate = abandoned_rate[['is_abandoned']] * 100 
print(f"Abandoned Rate:\n {abandoned_rate['is_abandoned']} %")

Abandoned Rate:
 49.519999999999996 %


In [46]:
successful_sessions = facts_df[facts_df['is_abandoned']==1]

In [47]:
successful_purchases = len(successful_sessions[successful_sessions['is_abandoned']==1]) / 100 * 2
print(f"Successful Purchases: \n {successful_purchases} %")

Successful Purchases: 
 50.48 %


In [48]:
devices = device_df.merge(facts_df, on='device_id', how='inner')
device_abandonment = devices.groupby('device_type')['is_abandoned'].mean() * 100
print(f"Device Abandonment Rates (%):\n{device_abandonment}")

Device Abandonment Rates (%):
device_type
Desktop    49.951028
Mobile     50.816327
Tablet     50.421000
Name: is_abandoned, dtype: float64


In [49]:
products = product_df.merge(facts_df, on='product_id', how='inner')
product_abandonment = products.groupby('category')['is_abandoned'].mean() * 100
print(f"Product Category Abandonment Rates (%):\n{product_abandonment}")

Product Category Abandonment Rates (%):
category
Apparel                   52.495010
Beauty & Personal Care    49.398907
Electronics               51.834431
Home & Kitchen            48.871443
Sports & Outdoors         49.650350
Name: is_abandoned, dtype: float64


In [59]:
city = customer_df.merge(facts_df, on='customer_id', how='inner')
city_abandonment = city.groupby('city')['is_abandoned'].mean() * 100
print(f"City-wise Abandonment Rates (%):\n{city_abandonment}")

City-wise Abandonment Rates (%):
city
Berlin      52.913085
London      52.145923
Mumbai      50.609081
New York    47.492904
Sydney      49.481621
Name: is_abandoned, dtype: float64


In [56]:
high_intent_customers = customer_df.merge(facts_df, on='customer_id', how='inner') \
                                   .merge(product_df, on='product_id', how='inner')
high_intent_customers = high_intent_customers[high_intent_customers['quantity'] >= 3]
high_intent_abandonment = high_intent_customers['is_abandoned'].mean() * 100
print(f"High Intent Customers Overall Abandonment Rate: {high_intent_abandonment:.2f}%")

High Intent Customers Overall Abandonment Rate: 50.30%
