In [16]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [17]:
# Import data
df = pd.read_pickle(r'data\04_fct\fct_demographic_offers_and_transactions.pkl')
df.head()

Unnamed: 0_level_0,age,income,days_as_member,gender_F,gender_M,offer_viewed,offer_completed,viewed_before_completion,difficulty,reward,duration_hrs,mobile,social,web,is_bogo,is_discount,total_transactions,total_transaction_amount
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0610b486422d4921ae7d2bf64640c50b,55,112000,376,1,0,0,1,0,5,5,168,1,0,1,1,0,1,23.22
0610b486422d4921ae7d2bf64640c50b,55,112000,376,1,0,0,0,0,0,0,96,1,0,1,0,0,0,0.0
78afa995795e4d85b5d9ceeca43f5fef,75,100000,443,1,0,1,1,1,5,5,168,1,0,1,1,0,1,19.89
78afa995795e4d85b5d9ceeca43f5fef,75,100000,443,1,0,1,0,0,0,0,72,1,1,0,0,0,0,0.0
78afa995795e4d85b5d9ceeca43f5fef,75,100000,443,1,0,1,1,1,10,10,168,1,1,0,1,0,1,21.72


In [18]:
### Segment Customers ###
# Extract demographic features for clustering
demographic_features = df[['age', 'income', 'days_as_member', 'gender_F', 'gender_M']]

# Standardize the features
scaler = StandardScaler()
demographic_features_scaled = scaler.fit_transform(demographic_features)

# Apply K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['segment'] = kmeans.fit_predict(demographic_features_scaled)

# Display the first few rows with the segment labels
df[['age', 'income', 'days_as_member', 'gender_F', 'gender_M', 'segment']].head()


Unnamed: 0_level_0,age,income,days_as_member,gender_F,gender_M,segment
customer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0610b486422d4921ae7d2bf64640c50b,55,112000,376,1,0,0
0610b486422d4921ae7d2bf64640c50b,55,112000,376,1,0,0
78afa995795e4d85b5d9ceeca43f5fef,75,100000,443,1,0,0
78afa995795e4d85b5d9ceeca43f5fef,75,100000,443,1,0,0
78afa995795e4d85b5d9ceeca43f5fef,75,100000,443,1,0,0


In [19]:
# Calculate mean values of features for each segment
cluster_characteristics = df.groupby('segment')[['age', 'income', 'days_as_member', 'gender_F', 'gender_M']].mean()

# Display the characteristics of each cluster
cluster_characteristics = round(cluster_characteristics,2)
cluster_characteristics

Unnamed: 0_level_0,age,income,days_as_member,gender_F,gender_M
segment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,65.98,86377.77,527.98,0.98,0.0
1,52.19,61268.48,529.31,0.0,0.99
2,46.8,52418.94,488.88,0.97,0.0


In [20]:
# Group data by segment and offer attributes, then calculate the response rate
response_data = df.groupby(['segment', 'is_bogo', 'is_discount', 'reward', 'difficulty', 'duration_hrs']).agg(
    {
    'offer_viewed': 'mean',
    'offer_completed': ['mean', 'sum'],
    'viewed_before_completion':'mean'
    }).reset_index()

# Flatten the MultiIndex columns
response_data.columns = ['_'.join(col).strip('_') for col in response_data.columns.values]

# Rename columns for clarity
response_data.rename(columns={
    'offer_viewed_mean': 'viewed_rate', 
    'offer_completed_mean': 'completion_rate',
    'offer_completed_sum': 'offers_completed',
    'offer_viewed_before_completion_mean': 'viewed_before_completion_rate'
    }, inplace=True)

# Identify the top segments for each offer type
top_segments = response_data.sort_values(by='completion_rate', ascending=False).groupby(['is_bogo', 'is_discount', 'reward', 'difficulty', 'duration_hrs']).head(1)

top_segments.to_csv(r'data\04_fct\fct_top_offer_response_segments.csv', index=False)
top_segments

Unnamed: 0,segment,is_bogo,is_discount,reward,difficulty,duration_hrs,viewed_rate,completion_rate,offers_completed,viewed_before_completion_mean
3,0,0,1,2,10,240,0.973333,0.861961,1099,0.752157
4,0,0,1,3,7,168,0.969817,0.837967,1055,0.708499
9,0,1,0,10,10,168,0.821319,0.776074,1012,0.516104
7,0,1,0,5,5,168,0.606583,0.774295,988,0.387931
6,0,1,0,5,5,120,0.973258,0.766613,946,0.641005
2,0,0,1,2,10,168,0.618267,0.753318,965,0.370023
8,0,1,0,10,10,120,0.968069,0.729751,937,0.61838
5,0,0,1,5,20,240,0.420155,0.720155,929,0.262016
21,2,0,0,0,0,96,0.562092,0.0,0,0.0
20,2,0,0,0,0,72,0.955869,0.0,0,0.0
