# Envir Set Up

In [68]:
import random 
import numpy as np
import pandas as pd
import os
import sys
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.float_format', lambda x: '%.f' % x)
pd.set_option('display.max_columns', None)

import seaborn as sb


from sklearn.model_selection import train_test_split 
from sklearn.metrics import precision_recall_curve,roc_auc_score,fbeta_score,recall_score
from sklearn.metrics import plot_precision_recall_curve,average_precision_score,f1_score
from sklearn.metrics import plot_roc_curve
from sklearn import metrics


# Data preparation

In [69]:
trans = pd.read_csv('Data/transaction_df.csv')
trans = trans.drop(['Unnamed: 0'], axis = 1)

In [70]:
trans.isna().sum()

id                            0
dcs_cust_acct_id              0
pos_busn_dt                   0
mcd_gbal_lcat_id_nu           0
pos_ord_key_id                0
fulfillment_channel           0
pos_tot_net_trn_am            0
pos_tot_tray_itm_qt           0
tot_pnt_earn_cnt_qt      304491
pnt_burn_cnt_qt         5086843
bas_pnt_earn_cnt_qt      307482
bonu_pnt_earn_cnt_qt    4985690
mobl_ord_fl                   0
dtype: int64

In [71]:
# replace the NA with 0
trans = trans.fillna(0)

In [72]:
# rename features
trans = trans.rename({
    'dcs_cust_acct_id': 'customer_id',
    'pos_busn_dt': 'date', # transaction date
    'pos_ord_key_id': 'order_id',
    'mcd_gbal_lcat_id_nu': 'restaurant_id',
    'fulfillment_channel':'channel',
    'pos_tot_net_trn_am': 'transaction_amount', 
    'pos_tot_tray_itm_qt': '#items_purchased',                       
    'tot_pnt_earn_cnt_qt': 'total_points_earned', 
    'pnt_burn_cnt_qt': 'total_points_burned',
    'bas_pnt_earn_cnt_qt': 'base_points_earned', 
    'bonu_pnt_earn_cnt_qt': 'bonus_points_earned',
    'mobl_ord_fl': 'mobile_order'}, 
    axis='columns')

In [73]:
trans['total_points_burned'] = 0-trans['total_points_burned']

In [74]:
trans['date'] = pd.to_datetime(trans['date'])
trans['day'] = trans.date.dt.day
trans['month'] = trans.date.dt.month
trans['year'] = trans.date.dt.year
trans['weekofday'] = trans.date.dt.dayofweek


In [75]:
feature = ['id','order_id','restaurant_id','channel',
           'date','year','month','weekofday','day',
           'transaction_amount','#items_purchased',
          'total_points_earned','total_points_burned',
          'base_points_earned','bonus_points_earned',
              'mobile_order']

In [76]:
df = trans[feature]

In [77]:
df.head()

Unnamed: 0,id,order_id,restaurant_id,channel,date,year,month,weekofday,day,transaction_amount,#items_purchased,total_points_earned,total_points_burned,base_points_earned,bonus_points_earned,mobile_order
0,21879,POS0012:31703987,195500337555,Drive Thru,2021-07-01,2021,7,3,1,24,11,2352,0,2352,0,0
1,69320,POS0002:1058249158,195500296326,Front Counter,2021-07-01,2021,7,3,1,0,1,0,0,0,0,0
2,198919,POS0014:1063605199,195500286556,Drive Thru,2021-07-01,2021,7,3,1,12,7,1159,0,1159,0,1
3,200321,POS0012:277600577,195500321196,Drive Thru,2021-07-01,2021,7,3,1,6,4,618,0,618,0,0
4,121937,FOE0016:880040023,195500337555,Front Counter,2021-07-01,2021,7,3,1,0,1,0,1500,0,0,1


In [14]:
round(df.describe().T,2)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
id,5727509,131849,76583,1,65897,131307,197714,265589
restaurant_id,5727509,195500327431,138580,195500017655,195500248332,195500286747,195500327372,195500893747
year,5727509,2021,0,2021,2021,2021,2021,2022
month,5727509,9,3,1,8,9,11,12
weekofday,5727509,3,2,0,1,3,5,6
day,5727509,15,9,1,8,15,22,31
transaction_amount,5727509,6,6,0,2,5,8,1000
#items_purchased,5727509,4,3,0,2,3,5,888
total_points_earned,5727509,733,773,0,209,500,958,107270
total_points_burned,5727509,-438,1364,-18000,0,0,0,0


# Feature Engineering


## Total Feature:
    - total_freq: buying frequency for each customer (F feature)
    - sum_transaction_amount: total monetary spend for each customer (M feature)
    - sum_total_points_earned
    - sum_base_points_earned
    - sum_bonus_points_earned 
    - sum_total_points_burned



In [78]:
# Total buying frequencxy for each customer
total_freq = df.groupby('id')['order_id'].count().to_frame().rename({'order_id':'total_freq'},axis='columns')
df = pd.merge(df,total_freq, 
                 how = 'left',
                 on=['id'])


In [79]:
# Total money spent, points earned and burn for each customer
total_monetary = df.groupby('id')['transaction_amount',
                                    'total_points_earned',
                                    'base_points_earned', 
                                    'bonus_points_earned', 
                                    'total_points_burned'].sum()
monetary_feature = ['sum_transaction_amount',
                          'sum_total_points_earned',
                          'sum_base_points_earned', 
                          'sum_bonus_points_earned', 
                          'sum_total_points_burned']
total_monetary.columns = monetary_feature

df = pd.merge(df,total_monetary, 
                 how = 'left',
                 on=['id'])

In [80]:
# Average Money per Order (The M feature)
df['avg_money_per_order']=df['sum_transaction_amount']/df['total_freq']



## Daily Feature
    - describe daily fequency,transaction amount, item purchased, bonus points earned/burned for each customer


In [81]:
# Daily monetary spent, #item purchased, points earned and burn for each customer
daily_sum = trans.groupby(['id', 'date'])["transaction_amount",
                                               "#items_purchased", 
                                               "total_points_earned",
                                               "total_points_burned", 
                                               "base_points_earned", 
                                               "bonus_points_earned"].sum()


In [82]:
daily_feature = ["daily_transaction_amount",
                 "daily_#items_purchased",
                 "daily_total_points_earned",
                 "daily_total_points_burned", 
                 "daily_base_points_earned", 
                 "daily_bonus_points_earned"]

daily_sum.columns = daily_feature


In [83]:
daily_df = daily_sum.reset_index()

In [84]:
daily_df['days_since_last_purchase'] = daily_df[["id", "date"]].groupby('id')['date'].apply(lambda x: (x - x.shift(1)).dt.days)


In [86]:
df = pd.merge(df,daily_df, 
                 how = 'left',
                 on=['id','date'])

In [87]:
df['Drive_Thru'] = df['channel'].apply(lambda x: 1 if x == 'Drive Thru' else 0)
df['Front_Counter'] = df['channel'].apply(lambda x: 1 if x == 'Front Counter' else 0)
df['Curbside'] = df['channel'].apply(lambda x: 1 if x == 'Curbside' else 0)

# Average Feature

In [66]:
df = pd.read_csv("Engineered Transaction Data.csv")

In [88]:
df.columns

Index(['id', 'order_id', 'restaurant_id', 'channel', 'date', 'year', 'month',
       'weekofday', 'day', 'transaction_amount', '#items_purchased',
       'total_points_earned', 'total_points_burned', 'base_points_earned',
       'bonus_points_earned', 'mobile_order', 'total_freq',
       'sum_transaction_amount', 'sum_total_points_earned',
       'sum_base_points_earned', 'sum_bonus_points_earned',
       'sum_total_points_burned', 'avg_money_per_order',
       'daily_transaction_amount', 'daily_#items_purchased',
       'daily_total_points_earned', 'daily_total_points_burned',
       'daily_base_points_earned', 'daily_bonus_points_earned',
       'days_since_last_purchase', 'Drive_Thru', 'Front_Counter', 'Curbside'],
      dtype='object')

In [89]:
avg_recency = df.groupby('id')['days_since_last_purchase'].mean().to_frame().rename({'days_since_last_purchase': 'avg_recency'}, axis='columns') 

df = pd.merge(df,avg_recency, how='left',on='id')

In [90]:
df.avg_recency.fillna(300,inplace=True)

In [91]:
first_day = df[['id','date']].groupby('id').min().reset_index().rename({'date':'first_day'},axis='columns')
df=pd.merge(df, first_day, left_on=['id'], right_on=['id'], how='left')

In [92]:
last_day = df[['id','date']].groupby('id').max().reset_index().rename({'date': 'last_day'}, axis='columns')
df = pd.merge(df,last_day,left_on=['id','date'],right_on=['id','last_day'],how="inner")


In [98]:
df.head()

Unnamed: 0,id,order_id,restaurant_id,channel,date,year,month,weekofday,day,transaction_amount,#items_purchased,total_points_earned,total_points_burned,base_points_earned,bonus_points_earned,mobile_order,total_freq,sum_transaction_amount,sum_total_points_earned,sum_base_points_earned,sum_bonus_points_earned,sum_total_points_burned,avg_money_per_order,daily_transaction_amount,daily_#items_purchased,daily_total_points_earned,daily_total_points_burned,daily_base_points_earned,daily_bonus_points_earned,days_since_last_purchase,Drive_Thru,Front_Counter,Curbside,avg_recency,first_day,last_day,total_day,avg_order_per_day
0,261954,POS0001:719946723,195500337555,Front Counter,2021-07-02,2021,7,4,2,6,4,579,0,579,0,0,1,6,579,579,0,0,6,6,4,579,0,579,0,,0,1,0,300,2021-07-02,2021-07-02,1,1
1,81120,POS0012:669023567,195500295046,Drive Thru,2021-07-01,2021,7,3,1,23,11,0,0,0,0,1,1,23,0,0,0,0,23,23,11,0,0,0,0,,1,0,0,300,2021-07-01,2021-07-01,1,1
2,127457,FOE0019:988543733,195500266329,Curbside,2021-07-04,2021,7,6,4,0,1,0,1500,0,0,1,2,4,399,399,0,1500,2,4,4,399,1500,399,0,,0,0,1,300,2021-07-04,2021-07-04,1,2
3,127457,FOE0019:1092875964,195500266329,Curbside,2021-07-04,2021,7,6,4,4,3,399,0,399,0,1,2,4,399,399,0,1500,2,4,4,399,1500,399,0,,0,0,1,300,2021-07-04,2021-07-04,1,2
4,184564,POS0012:12375165,195500330037,Drive Thru,2021-07-04,2021,7,6,4,5,3,467,0,467,0,1,1,5,467,467,0,0,5,5,3,467,0,467,0,,1,0,0,300,2021-07-04,2021-07-04,1,1


In [93]:
df['total_day'] = pd.to_datetime(df['last_day'])-pd.to_datetime(df['first_day'])

In [94]:
df['total_day'] = df['total_day'].dt.days

In [95]:
df['total_day']=df['total_day'] +1

In [96]:
# average money per order
df['avg_money_per_order']=df['sum_transaction_amount']/df['total_freq']

In [97]:
# average order per day
df['avg_order_per_day'] = df['total_freq']/df['total_day']

In [99]:
df.to_csv("Engineered Transaction Data.csv", index=False)