In [1]:
import pandas as pd
import datetime
marketing = pd.read_csv('transaction_table.csv')

In [47]:
# Number of customers
marketing["cust_id"].nunique()

7920

# Freuency

In [None]:
#1 How many transactions (visit frequency)
visitfreq = marketing[["cust_id", "tran_dt", "store_id"]].drop_duplicates().groupby(["cust_id"]).size().to_frame("visitfreq")

In [None]:
#2 How many stores visited   (loyal customer or not)
visitstore = marketing[["cust_id", "store_id"]].drop_duplicates().groupby(["cust_id"]).size().to_frame("visitstore")

In [None]:
#3 How many products bought each transaction at avg (Family vs Single) - delete duplicates
avgproduct =  marketing[["cust_id", "tran_dt", "store_id",'prod_id']].drop_duplicates().groupby(["cust_id","tran_dt","store_id"]).size().groupby("cust_id").mean().to_frame("avgproduct")

# Monetary

In [None]:
#4 Percentage of actual paid in revenue (cherry-pickers)
pctdiscount = marketing[["cust_id","tran_dt","store_id","tran_prod_sale_amt","tran_prod_paid_amt"]].groupby(["cust_id","tran_dt","store_id"]).sum()
pctdiscount["pctpaid"] = pctdiscount["tran_prod_paid_amt"]/pctdiscount["tran_prod_sale_amt"]
pctdiscount.reset_index(level = ["cust_id","tran_dt","store_id"], inplace = True)
pctpaid = pctdiscount[["cust_id","pctpaid"]].groupby("cust_id").mean()

In [12]:
#5 Percentage of promotion items per transaction (cherry-pickers) -1
marketsum = marketing[["cust_id","tran_dt","store_id","prod_id","tran_prod_discount_amt"]].groupby(["cust_id","tran_dt","store_id","prod_id"]).sum()
marketsum.reset_index(inplace=True)

In [None]:
#5 Percentage of promotion items per transaction (cherry-pickers)-2
promoprod = marketsum[marketsum["tran_prod_discount_amt"] != 0][["cust_id","tran_dt","store_id","tran_prod_discount_amt"]].groupby(["cust_id", "tran_dt","store_id"]).size().to_frame("promoprod")
totalprod = marketsum[["cust_id","tran_dt", "store_id"]].groupby(["cust_id","tran_dt", "store_id"]).size().to_frame("totalprod")
promotion = pd.concat([promoprod,totalprod], axis=1)
promotion.fillna(0, inplace=True)
promotion.reset_index(inplace=True)

In [None]:
#5 Percentage of promotion items per transaction (cherry-pickers)-3
promotion["pctpromo"] = promotion["promoprod"]/promotion["totalprod"]
pctpromo = promotion[["cust_id","pctpromo"]].groupby(["cust_id"]).mean()

In [None]:
#6 How much spent in total (VIP customer)
totalspent = marketing[["cust_id","tran_dt","store_id","tran_prod_sale_amt"]].groupby(["cust_id","tran_dt","store_id"]).sum().groupby(["cust_id"]).mean()
totalspent.rename(columns = {"tran_prod_sale_amt": "dollarspent"}, inplace=True)

# Date

In [3]:
# Change date column to weekdays/weekends
marketing['weekday'] = [datetime.datetime.strptime(x,'%Y-%m-%d').strftime('%a') for
 x in list(marketing.tran_dt)]
marketing.head()

Unnamed: 0,cust_id,tran_id,tran_dt,store_id,prod_id,prod_unit,tran_prod_sale_amt,tran_prod_sale_qty,tran_prod_discount_amt,tran_prod_offer_cts,tran_prod_paid_amt,prod_unit_price,weekday
0,139662,2.01711e+18,2017-11-03,584,145519008,CT,2.89,4.0,0.0,0,2.89,0.7225,Fri
1,799924,2.017111e+18,2017-11-12,349,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,Sun
2,1399898,2.017102e+18,2017-10-21,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,Sat
3,1399898,2.017111e+18,2017-11-11,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,Sat
4,1399898,2.017121e+18,2017-12-05,684,145519008,CT,2.89,4.0,-1.45,1,1.44,0.7225,Tue


In [4]:
# binary variable for weekend 1 vs. weekday0
marketing.loc[(marketing["weekday"] == "Sat") | (marketing["weekday"] == "Sun"), 'weekend'] = 1
marketing.fillna(0, inplace=True)

In [None]:
#7 Weekend revenue/total revenue
revenue = marketing[["cust_id","tran_dt","store_id","weekend","tran_prod_sale_amt"]].groupby(["cust_id","tran_dt", "store_id","weekend"]).sum().groupby(["cust_id","weekend"]).sum()
revenuepivot = revenue.reset_index().pivot(index = 'cust_id', columns = 'weekend', values = 'tran_prod_sale_amt').rename_axis(None, axis=1)
revenuepivot.fillna(0, inplace=True)
revenuepivot["pctwknddrevenue"] = revenuepivot[1]/(revenuepivot[0]+revenuepivot[1])
revenuepivot.drop(columns = [0,1], inplace = True)

In [None]:
#8 wknd_trans: Weekend trans/all trans
count = marketing[["cust_id","tran_dt","store_id","weekend"]].drop_duplicates().groupby(["cust_id","weekend"]).size().to_frame("count")
countpivot = count.reset_index().pivot(index = 'cust_id', columns = 'weekend', values = 'count').rename_axis(None, axis=1)
countpivot.fillna(0, inplace=True)
countpivot["pctwkndcount"] = countpivot[1]/( countpivot[0] + countpivot[1])
countpivot.drop(columns = [0,1], inplace = True)

# Concat 

In [41]:
table = pd.concat([visitfreq , visitstore, avgproduct, pctpaid, pctpromo, totalspent, revenuepivot, countpivot], axis=1)
table

Unnamed: 0_level_0,visitfreq,visitstore,avgproduct,pctpaid,pctpromo,tran_prod_sale_amt,pctwknddrevenue,pctcount
cust_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
29568,308,8,14.009740,0.901689,0.221216,40.422403,0.396651,0.340909
29909,400,10,14.947500,0.887080,0.254816,34.107875,0.309145,0.280000
39774,278,3,11.593525,0.825846,0.422230,40.609856,0.220049,0.248201
39856,600,3,10.595000,0.897885,0.230729,17.708833,0.327015,0.306667
59984,367,8,9.438692,0.871152,0.281169,27.865341,0.096816,0.111717
109693,343,9,11.612245,0.845842,0.331276,41.767755,0.197355,0.174927
109959,435,4,11.540230,0.941118,0.147392,20.551954,0.295983,0.257471
119781,541,6,9.384473,0.849779,0.291160,17.201941,0.268167,0.279113
139662,443,4,14.713318,0.948524,0.167072,23.288962,0.119478,0.130926
169587,333,5,5.558559,0.888135,0.211724,22.214264,0.384951,0.264264


In [45]:
table.to_csv("customer.csv")
marketing.to_csv("marketingwknd.csv")