In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from warnings import filterwarnings

In [38]:
# Create order_period column based on order time
def get_order_period(order_time):
    if order_time >= pd.to_datetime('06:00:00').time() and order_time < pd.to_datetime('12:00:00').time():
        return 'Morning'
    elif order_time >= pd.to_datetime('12:00:00').time() and order_time < pd.to_datetime('18:00:00').time():
        return 'Afternoon'
    elif order_time >= pd.to_datetime('18:00:00').time() and order_time <= pd.to_datetime('23:59:59').time():
        return 'Night'
    else:
        return 'Night'
    
def get_week_of_month(date):
    first_day = date.replace(day=1)
    day_of_month = date.day
    adjusted_dom = day_of_month + first_day.weekday()
    return (adjusted_dom - 1) // 7 + 1

In [39]:
data = pd.read_csv('data\delhi_data.csv')
data.head()

  data = pd.read_csv('data\delhi_data.csv')


Unnamed: 0,Order ID,Customer ID,Restaurant ID,Order Date and Time,Delivery Date and Time,Order Value,Delivery Fee,Payment Method,Discounts and Offers,Commission Fee,Payment Processing Fee,Refunds/Chargebacks
0,1,C8270,R2924,2024-02-01 01:11:52,2024-02-01 02:39:52,1914,0,Credit Card,5% on App,150,47,0
1,2,C1860,R2054,2024-02-02 22:11:04,2024-02-02 22:46:04,986,40,Digital Wallet,10%,198,23,0
2,3,C6390,R2870,2024-01-31 05:54:35,2024-01-31 06:52:35,937,30,Cash on Delivery,15% New User,195,45,0
3,4,C6191,R2642,2024-01-16 22:52:49,2024-01-16 23:38:49,1463,50,Cash on Delivery,,146,27,0
4,5,C6734,R2799,2024-01-29 01:19:30,2024-01-29 02:48:30,1992,30,Cash on Delivery,50 off Promo,130,50,0


Feature Engineering

In [40]:
df = data.copy()

# Convert 'Order Date and Time' and 'Delivery Date and Time' to datetime format
df['Order Date and Time'] = pd.to_datetime(df['Order Date and Time'])
df['Delivery Date and Time'] = pd.to_datetime(df['Delivery Date and Time'])

# Extracting separate columns for order and delivery date and time
df['Order Date'] = df['Order Date and Time'].dt.date
df['Order Time'] = df['Order Date and Time'].dt.time
df['Delivery Date'] = df['Delivery Date and Time'].dt.date
df['Delivery Time'] = df['Delivery Date and Time'].dt.time

# Extract numerical value of discount from 'Discounts and Offers' column
df['Discount'] = df['Discounts and Offers'].str.extract(r"(\d+\.?\d*)").astype('Int8').fillna(0)

df['Order Period'] = df['Order Time'].apply(get_order_period)
df['Order Week'] = df['Order Date'].apply(get_week_of_month)

# Convert 'Order Week of Month' to an ordered categorical type
df['Order Week'] = df['Order Date and Time'].apply(lambda x: get_week_of_month(x))
week_categories = [1, 2, 3, 4, 5]
df['Order Week'] = pd.Categorical(df['Order Week'], categories=week_categories, ordered=True)

# Replace values in 'Payment Method' column
df['Payment Method'] = df['Payment Method'].replace({
    'Cash on Delivery': 'COD',
    'Credit Card': 'CC',
    'Digital Wallet': 'UPI'
})

df.sample(6)

Unnamed: 0,Order ID,Customer ID,Restaurant ID,Order Date and Time,Delivery Date and Time,Order Value,Delivery Fee,Payment Method,Discounts and Offers,Commission Fee,Payment Processing Fee,Refunds/Chargebacks,Order Date,Order Time,Delivery Date,Delivery Time,Discount,Order Period,Order Week
313,314,C3806,R2442,2024-02-02 05:57:47,2024-02-02 06:47:47,1594,50,UPI,50 off Promo,141,38,0,2024-02-02,05:57:47,2024-02-02,06:47:47,50,Night,1
195,196,C7546,R2309,2024-01-26 20:16:24,2024-01-26 22:15:24,1044,20,UPI,50 off Promo,178,44,0,2024-01-26,20:16:24,2024-01-26,22:15:24,50,Night,4
321,322,C8992,R2619,2024-02-03 11:03:25,2024-02-03 12:21:25,1313,0,CC,50 off Promo,71,34,150,2024-02-03,11:03:25,2024-02-03,12:21:25,50,Morning,1
139,140,C2306,R2040,2024-01-09 14:14:48,2024-01-09 15:48:48,793,20,COD,50 off Promo,106,19,100,2024-01-09,14:14:48,2024-01-09,15:48:48,50,Afternoon,2
634,635,C4419,R2119,2024-01-17 22:07:53,2024-01-17 22:43:53,895,20,UPI,10%,178,16,0,2024-01-17,22:07:53,2024-01-17,22:43:53,10,Night,3
376,377,C8404,R2238,2024-01-28 04:16:43,2024-01-28 05:11:43,1161,30,UPI,,195,34,0,2024-01-28,04:16:43,2024-01-28,05:11:43,0,Night,4


In [41]:

## Top 5 valued Customers
cust_spend = df.groupby(['Customer ID']).agg({'Order Value':'sum'}).reset_index().sort_values(by='Order Value',ascending=False)
cust_spend.head()

Unnamed: 0,Customer ID,Order Value
730,C7949,3870
282,C3832,3768
770,C8266,3703
457,C5555,3664
406,C5146,3361


Customer wise analysis

In [42]:
import datetime as dt

In [43]:
cust_id = 'C7938'

In [44]:
customer_metrics = pd.DataFrame(columns=['Metric','Value'])
customer_metrics = customer_metrics.iloc[0:0]

In [45]:
# Assuming 'df' is the dfset containing customer information
current_date = dt.datetime.now()

# 1. Recency: Number of days since the customer's last order
recency = df.groupby('Customer ID')['Order Date and Time'].max().reset_index()
recency['Recency'] = (current_date - recency['Order Date and Time']).dt.days

df['Order Date and Time'] = pd.to_datetime(df['Order Date and Time'])
current_date = dt.datetime.now()

# 1. Recency: Number of days since the customer's last order
recency = df.groupby('Customer ID')['Order Date and Time'].max().reset_index()
recency['Recency'] = (current_date - recency['Order Date and Time']).dt.days

# Use .loc to filter the recency dfFrame for the given customer ID
customer_recency = recency[recency['Customer ID'] == cust_id][['Customer ID','Recency']].set_index(['Customer ID'])

customer_metrics = customer_metrics._append({'Customer ID': cust_id, 'Metric': 'Recency', 'Value': customer_recency['Recency'].values[0]}, ignore_index=True)
customer_metrics.head()


Unnamed: 0,Metric,Value,Customer ID
0,Recency,263,C7938


In [46]:
##  Function to calculate Average Order Value (AOV)
def calculate_average_order_value(customer_id):
    
    customer_df = df[df['Customer ID'] == customer_id]
    
    if not customer_df.empty:
        # Calculate total value of orders (monetary) and frequency
        monetary = customer_df['Order Value'].sum()
        frequency = len(customer_df)
        
        # Calculate Average Order Value
        aov = monetary / frequency
        return pd.DataFrame({'Metric': ['Average Order Value'], 'Value': [round(aov,2)]})
    
    return None

aov = calculate_average_order_value(cust_id)
aov

Unnamed: 0,Metric,Value
0,Average Order Value,760.33


In [49]:
def calculate_preferred_order_period(customer_id):
    
    customer_data = df[df['Customer ID'] == customer_id]
    if not customer_data.empty:
        preferred_period = customer_data['Order Period'].mode()[0]
        return pd.DataFrame({'Metric': ['Preferred Order Period'], 'Value': [preferred_period]})
    return None

preferred_order_period_df = calculate_preferred_order_period(cust_id)



                   Metric      Value
0  Preferred Order Period  Afternoon


In [51]:
def calculate_average_days_between_orders(customer_id):
    # global data
    customer_data = df[df['Customer ID'] == customer_id].sort_values(by='Order Date and Time')
    if len(customer_data) > 1:
        customer_data['Time Difference'] = customer_data['Order Date and Time'].diff().dt.days
        avg_time_between_orders = customer_data['Time Difference'].mean()
        return pd.DataFrame({'Metric': ['Average Time Between Orders'], 'Value': [avg_time_between_orders]})
    return None

# Example usage:
average_days_between_orders_df = calculate_average_days_between_orders(cust_id)
print(average_days_between_orders_df)


                        Metric  Value
0  Average Time Between Orders    7.0


In [53]:
def calculate_discount_usage_rate(customer_id):
    # global data
    customer_data = df[df['Customer ID'] == customer_id]
    if not customer_data.empty:
        total_orders = len(customer_data)
        discount_orders = len(customer_data[customer_data['Discount'] > 0])
        discount_usage_rate = (discount_orders / total_orders) * 100
        return pd.DataFrame({'Metric': ['Discount Usage Rate'], 'Value': [discount_usage_rate]})
    return None

discount_usage_rate_df = calculate_discount_usage_rate(cust_id)
print(discount_usage_rate_df)

                Metric  Value
0  Discount Usage Rate  100.0


In [54]:
# Function to calculate Preferred Payment Method
def calculate_preferred_payment_method(customer_id):
    # global data
    customer_data = df[df['Customer ID'] == customer_id]
    if not customer_data.empty:
        preferred_payment_method = customer_data['Payment Method'].mode()[0]
        return pd.DataFrame({'Metric': ['Preferred Payment Method'], 'Value': [preferred_payment_method]})
    return None

preferred_payment_method_df = calculate_preferred_payment_method(cust_id)
print(preferred_payment_method_df)


                     Metric Value
0  Preferred Payment Method   COD
