# Customer Segementation

In [92]:
import numpy as np
import pandas as pd
import time
from datetime import datetime,timedelta
import warnings
from apyori import apriori
import ast
warnings.filterwarnings("ignore") 

In [93]:
# df = pd.read_csv("../updated_pos_no2.csv")
df = pd.read_csv('Transformed_data.csv')
if('Unnamed' in df.columns[0]):
    df = df.drop(df.columns[0], axis=1)

In [94]:
df['Date'].max()
df['Date'] = pd.to_datetime(df['Date'])
snapshot_day = df['Date'].max() + timedelta(days=1)
days_ago_series = (snapshot_day-df['Date']).astype('timedelta64[D]').astype(int)

In [95]:
RFM = df.groupby('Customer ID').agg(Recency=('Date',lambda x :(snapshot_day - x.max())))
RFM['Frequency']= df.groupby('Customer ID').agg(Frequency=('Bill No','count'))
RFM['Monetary'] = df.groupby('Customer ID').agg(Monetary=('Amount','sum'))

In [96]:
recency_labels = range(4,0,-1) #lowest value is given the highest ranking coz it tells that person has bought recently 

RFM['R_score'] = pd.qcut(RFM['Recency'], q=4,labels=recency_labels)

Frequency_labels = range(1,5)

RFM['F_score'] = pd.qcut(RFM['Frequency'], q=4,labels=Frequency_labels)

Monetary_labels = range(1,5)

RFM['M_score'] = pd.qcut(RFM['Monetary'], q=4,labels=Monetary_labels)

RFM['rfm_total_score'] = RFM['R_score'].astype(int)+RFM['F_score'].astype(int)+RFM['M_score'].astype(int)

RFM_Segments = (RFM['R_score'].astype(str) + '.'+ RFM['F_score'].astype(str)+'.' + RFM['M_score'].astype(str))

RFM['Segments'] = RFM_Segments

In [97]:
def group_function(df):
    if df['rfm_total_score'] > 9:
        return "Elite Customers"
    elif df['rfm_total_score'] > 5 and df['F_score'] == 4:
        return 'Frequent Customers'
    elif df['rfm_total_score'] <= 5 and df['F_score'] == 1:
        return 'Rare Customer'
    elif df['rfm_total_score'] <= 5:
        return "Cost-conscious Customers"
    else:
        return 'Moderate Value Customers'

In [98]:
RFM['Group']= RFM.apply(group_function,axis=1)
if(len(RFM.columns) == 9):
    RFM.reset_index(inplace=True)
customer_segments_df = RFM[['Customer ID', 'Group']]
customer_segments_df = customer_segments_df.reset_index(drop=True)
customer_segments_df.to_csv('customer_segments.csv')

# Apriori

# Data grouping and transformation (Customer Segmenatation)

In [99]:
merged_df = pd.merge(df, customer_segments_df, on='Customer ID', how='outer')

elite = merged_df[merged_df['Group'] == 'Elite Customers']
elite_transactions = elite.groupby('Bill No')['Sub Category Name'].apply(list).reset_index()
elite_transactions.drop('Bill No', axis=1, inplace=True)

costcon = merged_df[merged_df['Group'] == 'Cost-conscious Customers']
costcon_transactions = costcon.groupby('Bill No')['Sub Category Name'].apply(list).reset_index()
costcon_transactions.drop('Bill No', axis=1, inplace=True)

freq = merged_df[merged_df['Group'] == 'Frequent Customers']
freq_transactions = freq.groupby('Bill No')['Sub Category Name'].apply(list).reset_index()
freq_transactions.drop('Bill No', axis=1, inplace=True)

# Variables for global usage and finalized output

In [100]:
all_consequents, all_antecedents, all_conf, all_lift = [], [], [], []

In [101]:
def apriori_rg(dataset, min_support_ = 0.01):
    antecedents, consequents, consequent_conf, consequent_lift = [], [], [], []
    transactions = []
    for x in dataset['Sub Category Name']:
        transactions.append(x)
    rules = list(apriori(transactions, min_support = min_support_, min_confidence = 0.7, min_lift = 1.5))
    
    print(f"*** Total number of rules formulated: {len(rules)} ***")
    
    for i in rules:
        base_item_set = str(i[2][0].items_base).removeprefix("frozenset({").removesuffix("})")
        add_item_set = str(i[2][0].items_add).removeprefix("frozenset({").removesuffix("})")
        confidence = f"{round(i[2][0].confidence * 100,2)}"
        lift = f"{round(i[2][0].lift,2)}"
        if add_item_set not in consequents:
            antecedents.append(base_item_set)
            consequents.append(add_item_set)
            consequent_conf.append(float(confidence))
            consequent_lift.append(float(lift))
            if add_item_set not in all_consequents:
                all_antecedents.append(base_item_set)
                all_consequents.append(add_item_set)
                all_conf.append(float(confidence))
                all_lift.append(float(lift))
        else:
            prev_conf = consequent_conf[consequents.index(add_item_set)]
            if(float(confidence) > prev_conf):
                antecedents[consequents.index(add_item_set)] = base_item_set
                all_antecedents[consequents.index(add_item_set)] = base_item_set
                consequent_conf[consequents.index(add_item_set)] = float(confidence)
                all_conf[consequents.index(add_item_set)] = float(confidence)
                consequent_lift[consequents.index(add_item_set)] = float(lift)
                all_lift[consequents.index(add_item_set)] = float(lift)
    print(f"*** Total number of strong rules: {len(consequents)} ***")
    print("Market Basket Analysis Rules", end="\n\n")
    for i in range(len(consequents)):
        print(f"Item set 1: {antecedents[i]}\nItem set 2: {consequents[i]}\nconfidence = {consequent_conf[i]}%\nlift = {consequent_lift[i]}", end="\n\n")

# Generate rules for elite, cost conscious and frequent customers.

In [102]:
dfs_list_ap = [elite_transactions, costcon_transactions, freq_transactions]
for i in range(3):
    apriori_rg(dfs_list_ap[i])

*** Total number of rules formulated: 1060 ***
*** Total number of strong rules: 7 ***
Market Basket Analysis Rules

Item set 1: 'BISCUITS', 'JUICES', 'COLD & FROZEN FOODS'
Item set 2: 'DAIRY'
confidence = 81.82%
lift = 1.7

Item set 1: 'DAIRY', 'TOILET BATHROOM CLEANERS', 'DISPOSABLES/ PARTY SUPPLIES'
Item set 2: 'FLOOR CLEANERS'
confidence = 80.3%
lift = 1.85

Item set 1: 'TEA', 'NAMKEENS', 'OIL'
Item set 2: 'BISCUITS'
confidence = 83.31%
lift = 3.25

Item set 1: 'WHOLE CEREALS', 'WHOLE SPICES', 'SUGAR'
Item set 2: 'DAL&PULSES'
confidence = 79.44%
lift = 6.23

Item set 1: 'DAL&PULSES', 'WHOLE CEREALS', 'SUGAR'
Item set 2: 'FLOURS'
confidence = 75.7%
lift = 4.56

Item set 1: 'DAL&PULSES', 'TEA', 'SUGAR'
Item set 2: 'OIL'
confidence = 70.38%
lift = 6.05

Item set 1: 'DETERGENTS', 'TEA', 'OIL'
Item set 2: 'UTENSIL SOAPS/SCRUBBER'
confidence = 70.18%
lift = 8.02

*** Total number of rules formulated: 290 ***
*** Total number of strong rules: 13 ***
Market Basket Analysis Rules

Item set 

# Model Artifact Name convention genertation

In [103]:
dt_now = str(datetime.now())
list_dt = dt_now.split()
cur_date, curr_time =  list_dt[0], list_dt[1][:5].replace(':', '-')

name_conv = f"{cur_date}--{curr_time}_MBA_rules"

# CSV conversion (OUTPUT)

In [104]:
csv_conv = []
for i in range(len(all_consequents)):
    antecedent = all_antecedents[i]
    consequent = all_consequents[i]
    confidence_ = all_conf[i]
    lift_ = all_lift[i]
    tran = [antecedent, consequent, confidence_, lift_]
    csv_conv.append(tran)
df = pd.DataFrame(csv_conv, columns=['Antecedent', 'Consequent', 'Confidence','Lift'])
name_conv
df.to_csv(f'{name_conv}.csv')