Behavioural Analysis of Customer Segments

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from behavioural_analysis import *

In [None]:
# input csv file with labelled rfm_segments by year (this should be a csv of a pre-trained and labelled data) and read it into a pandas dataframe
df = pd.read_csv('rfm_segments_2020.csv', low_memory=False) # using 2020 as a reference year

In [3]:
# Data cleaning
df['purchase_date'] = pd.to_datetime(df['purchase_date'])
df['year'] = df['purchase_date'].dt.year  # Extract the year
df['campaign_name'] = df['campaign_key'].str[5:8]  # Extract the campaign code
df = df[df['year'] < 2021]  # Filter out rows from 2021 onwards

# Map campaign codes to actual campaign names
campaign_mapping = {
    'EST': 'Easter Sale',
    'MID': 'Mid-Year Sale',
    'HLW': 'Halloween',
    'XMS': 'Christmas',
    'STD': "St. Patrick's Day",
    'CMO': 'Cinco de Mayo',
    'DRK': 'Drinks Bonanza',
    'FBD': 'Food and Beverage Day',
    'SMS': 'Super Mart Sale',
    'MMS': 'Markdown Mega Sale'
}
df['campaign_name'] = df['campaign_name'].map(campaign_mapping)
df['time_of_day'] = df['time_of_purchase'].apply(extract_time_of_day)
df['category'] = df['description'].apply(extract_category)
df['channel_type'] = df['mkt_chnl_key'].apply(extract_channel_type)

df = add_marketing_indicator(df, df, customer_id_col='customer_key', marketing_key_col='mkt_chnl_key')
df.head()

Unnamed: 0,customer_key,quantity_purchased,total_price,purchase_date,time_of_purchase,item_name,description,unit_price,manufacturing_country,supplier,...,revenue,campaign_key,mkt_chnl_key,Segment,year,campaign_name,time_of_day,category,channel_type,marketing_indicator
0,C000001,5,70,2019-10-05,10:15:00,Austin Toasty Crackers w/ PBtr,Food - Snacks,14,China,"CHERRY GROUP CO.,LTD",...,15,,,Low-Value,2019,,morning,Food,,1.0
1,C000001,10,290,2017-04-20,22:38:00,Foam Coffee Cups - 20 ounce,Dishware - Cups Hot,29,Netherlands,Bolsius Boxmeer,...,110,2017-EST-APR,MKT-RADIO-EASSAL-2017,Low-Value,2017,Easter Sale,midnight,Dishware,RADIO,1.0
2,C000001,6,96,2017-06-27,23:49:00,Sprite - 12 oz cans,a. Beverage - Soda,16,poland,CHROMADURLIN S.A.S,...,78,2017-MID-JUN,MKT-RADIO-MIDSAL-2017,Low-Value,2017,Mid-Year Sale,midnight,Beverage,RADIO,1.0
3,C000001,7,56,2017-09-13,14:44:00,Waterloo Sparkling Watermelon 12oz,a. Beverage Sparkling Water,8,poland,CHROMADURLIN S.A.S,...,42,2017-DRK-SEP,MKT-RADIO-DRIBON-2017,Low-Value,2017,Drinks Bonanza,afternoon,Beverage,RADIO,1.0
4,C000001,2,84,2014-04-03,20:46:00,K Cups - McCafe Premium Roast,Coffee K-Cups,42,India,Indo Count Industries Ltd,...,10,,,Low-Value,2014,,evening,Coffee products,,1.0


In [4]:
analyze_popular_items_by_average_revenue(df, segment_col='Segment',category_col='category', revenue_col='revenue')

Unnamed: 0,Segment,category,avg_revenue
1,High-Value,Coffee products,79.431323
5,High-Value,Kitchen Supplies,63.442667
0,High-Value,Beverage,52.993266
2,High-Value,Dishware,52.800186
3,High-Value,Food,52.740203
4,High-Value,Gum,42.843817
6,High-Value,Medicine,26.892387
8,Low-Value,Coffee products,75.036993
12,Low-Value,Kitchen Supplies,61.393009
10,Low-Value,Food,52.019033


In [5]:
plot_time_of_day_purchases_by_segment(df)

In [6]:
plot_campaign_participation_percentage_by_segment(df)

In [None]:

campaigns = ['Easter Sale', 'Mid-Year Sale', 'Christmas']
fig = plot_campaign_participation_distribution(df, campaigns)
fig.show()



In [None]:


channels = ['RADIO', 'TV', 'INSTAGRAM', 'YOUTUBE','TIKTOK', 'PHYSICAL']
plot_channel_participation_distribution(df, channels)


