# EXAMPLES FOR EXPLAINING METHODS AND VISUALIZATION

In [1]:
import pandas as pd
import numpy as np
import math
import json

%matplotlib inline

# read in the json files
portfolio = pd.read_json('data/portfolio.json', orient='records', lines=True)
profile = pd.read_json('data/profile.json', orient='records', lines=True)
transcript = pd.read_json('data/transcript.json', orient='records', lines=True)

In [332]:
def id_mapper(df , column):
    '''
    Map a column in a DataFrame and create a dict to change its value to a sequence of strings ('1','2','3'...) for easier use. 
    A str is consider becouse its numeric value dosent have any meaning
    note: this function was considere for changing user ids and offer ids into easier values, but during the procces it was not necessary, its only used to name the type of offers.
    
    INPUT:
        df - (DataFrame) 
        column - (str) name of the column to create dictionary
    OUTPUT:
        coded_dict - (dict) A dictionary with the given column values as key and the 'new' encoded sequence as value
    '''  
    coded_dict = dict()
    cter = 1
    
    for x in df[column]:
        if x not in coded_dict:
            coded_dict[x] = cter
            cter+=1
            
    return coded_dict

In [308]:
def offer_type_mapper(df=portfolio):
    '''
   Create a data frame to relate each offer with offer type
    
    IMPUT:  df - (DataFrame) - portfolio as default dataframe 
    OUTPUT: 
            offer_type - (DataFrame) - relation between offer id and type of offer  
            coded_dict - (dict) - relation between type offer sequence and real type offer (see id_mapper)
    
    '''    
    # get sequence to name type of offers
    coded_dict = id_mapper(df, 'offer_type')
    
    coded_df = df.replace({"offer_type":coded_dict})
    offer_type = coded_df[['id' , 'offer_type' , 'duration']]
    
    return offer_type , coded_dict

In [12]:
portfolio

Unnamed: 0,reward,channels,difficulty,duration,offer_type,id
0,10,"[email, mobile, social]",10,7,bogo,ae264e3637204a6fb9bb56bc8210ddfd
1,10,"[web, email, mobile, social]",10,5,bogo,4d5c57ea9a6940dd891ad53e9dbe8da0
2,0,"[web, email, mobile]",0,4,informational,3f207df678b143eea3cee63160fa8bed
3,5,"[web, email, mobile]",5,7,bogo,9b98b8c7a33c4b65b9aebfe6a799e6d9
4,5,"[web, email]",20,10,discount,0b1e1539f2cc45b7b9fa7c272da2e1d7
5,3,"[web, email, mobile, social]",7,7,discount,2298d6c36e964ae4a3e7e9706d1fb8c2
6,2,"[web, email, mobile, social]",10,10,discount,fafdcd668e3743c1bb461111dcafc2a4
7,0,"[email, mobile, social]",0,3,informational,5a8bc65990b245e5a138643cd4eb9837
8,5,"[web, email, mobile, social]",5,5,bogo,f19421c1d4aa40978ebb69ca19b0e20d
9,2,"[web, email, mobile]",10,7,discount,2906b810c7d4411798c6938adc9daaa5


In [2]:
# split gender into dummies columns
profile_mod = pd.concat([profile , pd.get_dummies(profile['gender'])],axis=1)
profile_mod.drop(['gender' , 'became_member_on'],axis=1, inplace=True)

In [309]:
#map offer type 
map_offer_type, dict_offer_type = offer_type_mapper(portfolio)
map_offer_type['duration'] = map_offer_type['duration'] * 24
map_offer_type.rename(columns={'id': 'offer id'} , inplace = True)

map_offer_type

Unnamed: 0,offer id,offer_type,duration
0,ae264e3637204a6fb9bb56bc8210ddfd,1,168
1,4d5c57ea9a6940dd891ad53e9dbe8da0,1,120
2,3f207df678b143eea3cee63160fa8bed,2,96
3,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,168
4,0b1e1539f2cc45b7b9fa7c272da2e1d7,3,240
5,2298d6c36e964ae4a3e7e9706d1fb8c2,3,168
6,fafdcd668e3743c1bb461111dcafc2a4,3,240
7,5a8bc65990b245e5a138643cd4eb9837,2,72
8,f19421c1d4aa40978ebb69ca19b0e20d,1,120
9,2906b810c7d4411798c6938adc9daaa5,3,168


# arrange_events(df=transcript):

In [310]:
# read dictionary from 'value' feature and create columns 
df_no = pd.concat([transcript.drop(['value'], axis=1), transcript['value'].apply(pd.Series)], axis=1)

In [311]:
# merge offer id and offer_id columns
df_no['offer id'] = df_no['offer id'].combine_first(df_no['offer_id'])
df = df_no.drop(columns = ['offer_id'])

In [313]:
# split into two dataFrames to merge rows with transaction and offer recived in the same time
df1 = df[df['event'] == 'offer completed'][['person' , 'event' , 'time' , 'offer id' , 'reward']]
df2 = df[df['event'] == 'transaction'][['person' ,  'time' , 'amount']]
df3 = df[df['event'] != 'offer completed']
df3 = df3[df3['event'] != 'transaction'][['person' , 'event' , 'time' , 'offer id']]

In [314]:
df_trans_completed = pd.merge(df1, df2, how='outer', on=['person', 'time'])

In [315]:
df = pd.merge(df3, df_trans_completed, how='outer', on=['person', 'time' , 'event' , 'offer id'])

In [317]:
# create columns of type of event with the value of time
df = pd.concat([df, df.pivot_table(values='time', index=df.index, columns='event', aggfunc='first')], axis=1, sort=False)
df.head()

Unnamed: 0,person,event,time,offer id,reward,amount,offer completed,offer received,offer viewed
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,
1,a03223e636434f42ac4c3df47e8bac43,offer received,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,0.0,
2,e2127556f4f64592b11af22de27a7932,offer received,0,2906b810c7d4411798c6938adc9daaa5,,,,0.0,
3,8ec6ce2a7e7949b1bf142def7d0e0586,offer received,0,fafdcd668e3743c1bb461111dcafc2a4,,,,0.0,
4,68617ca6246f4fbc85e91a2a49552598,offer received,0,4d5c57ea9a6940dd891ad53e9dbe8da0,,,,0.0,


In [318]:
# fill NaN values in the offer id feature as 'no offer' to keep track of the transactions without an offer
df['offer id'] = df['offer id'].fillna(value = 'no offer')
df = df.rename(columns={'person':'user id'})

In [320]:
df = pd.merge(df, map_offer_type, how='outer', on=['offer id'])
df.rename(columns={'offer_type': 'offer type'} , inplace = True)
df

Unnamed: 0,user id,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
1,ebe7ef46ea6f4963a7dd49f501b26779,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
2,f082d80f0aac47a99173ba8ef8fc1909,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
3,c0d210398dee4a0895b24444a5fcd1d2,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
4,57dd18ec5ddc46828afb81ec5977bef2,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
...,...,...,...,...,...,...,...,...,...,...,...
275912,b3a1272bc9904337b331bf348c3e8c17,,714,no offer,,1.59,,,,,
275913,68213b08d99a4ae1b0dcb72aebd9aa35,,714,no offer,,9.53,,,,,
275914,a00058cf10334a308c68e7631c529907,,714,no offer,,3.61,,,,,
275915,76ddbd6576844afe811f1a3c0fbb5bec,,714,no offer,,3.53,,,,,


# offer_merge()#

In [333]:
user_1 = df[df['user id'] == '78afa995795e4d85b5d9ceeca43f5fef']
user_2 = df[df['user id'] == 'a03223e636434f42ac4c3df47e8bac43']
user_2

Unnamed: 0,user id,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration
16202,a03223e636434f42ac4c3df47e8bac43,offer received,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,0.0,,3.0,240.0
17548,a03223e636434f42ac4c3df47e8bac43,offer viewed,6,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,,6.0,3.0,240.0
23117,a03223e636434f42ac4c3df47e8bac43,offer received,504,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,504.0,,3.0,240.0
24742,a03223e636434f42ac4c3df47e8bac43,offer received,576,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,576.0,,3.0,240.0
26327,a03223e636434f42ac4c3df47e8bac43,offer viewed,624,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,,624.0,3.0,240.0
127473,a03223e636434f42ac4c3df47e8bac43,offer received,336,3f207df678b143eea3cee63160fa8bed,,,,336.0,,2.0,96.0
128752,a03223e636434f42ac4c3df47e8bac43,offer viewed,336,3f207df678b143eea3cee63160fa8bed,,,,,336.0,2.0,96.0
160432,a03223e636434f42ac4c3df47e8bac43,offer received,408,5a8bc65990b245e5a138643cd4eb9837,,,,408.0,,2.0,72.0
198163,a03223e636434f42ac4c3df47e8bac43,,234,no offer,,1.09,,,,,
202840,a03223e636434f42ac4c3df47e8bac43,,264,no offer,,3.5,,,,,


In [334]:
user_1

Unnamed: 0,user id,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
1361,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,6,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,,6.0,1.0,168.0
12360,78afa995795e4d85b5d9ceeca43f5fef,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,19.89,132.0,,,1.0,168.0
94026,78afa995795e4d85b5d9ceeca43f5fef,offer received,504,f19421c1d4aa40978ebb69ca19b0e20d,,,,504.0,,1.0,120.0
98092,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,582,f19421c1d4aa40978ebb69ca19b0e20d,,,,,582.0,1.0,120.0
101819,78afa995795e4d85b5d9ceeca43f5fef,offer completed,510,f19421c1d4aa40978ebb69ca19b0e20d,5.0,21.72,510.0,,,1.0,120.0
142458,78afa995795e4d85b5d9ceeca43f5fef,offer received,408,ae264e3637204a6fb9bb56bc8210ddfd,,,,408.0,,1.0,168.0
143736,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,408,ae264e3637204a6fb9bb56bc8210ddfd,,,,,408.0,1.0,168.0
151956,78afa995795e4d85b5d9ceeca43f5fef,offer completed,510,ae264e3637204a6fb9bb56bc8210ddfd,10.0,21.72,510.0,,,1.0,168.0
155716,78afa995795e4d85b5d9ceeca43f5fef,offer received,168,5a8bc65990b245e5a138643cd4eb9837,,,,168.0,,2.0,72.0


In [338]:
def fill_amount(df):

    dict_values = {}
    index_lst = []

    a = df.loc[(df['event'] == 'offer viewed') & (df['offer type'] == 2)]

    for i in range(a.shape[0]):   
        b = df.loc[(df['time'] >= a['time'].values[i]) & (df['time'] <= (a['time'].values[i] + a['duration'].values[i])) & (df['offer id'] == 'no offer')]
        if (b.shape[0] != 0):
            index_lst.append(b.index[0])
            c = b['amount'].to_list()[0]
            dict_values.update({a.index.to_list()[i]: c})
        else:
            dict_values.update({a.index.to_list()[i]: np.nan})

    df["amount"].fillna(dict_values, inplace=True)
    df.drop(index=index_lst, axis=0 , inplace=True)

    return df

In [339]:
user_1 = fill_amount(user_1)
user_2 = fill_amount(user_2)
user_1

Unnamed: 0,user id,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,0.0,,1.0,168.0
1361,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,6,9b98b8c7a33c4b65b9aebfe6a799e6d9,,,,,6.0,1.0,168.0
12360,78afa995795e4d85b5d9ceeca43f5fef,offer completed,132,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,19.89,132.0,,,1.0,168.0
94026,78afa995795e4d85b5d9ceeca43f5fef,offer received,504,f19421c1d4aa40978ebb69ca19b0e20d,,,,504.0,,1.0,120.0
98092,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,582,f19421c1d4aa40978ebb69ca19b0e20d,,,,,582.0,1.0,120.0
101819,78afa995795e4d85b5d9ceeca43f5fef,offer completed,510,f19421c1d4aa40978ebb69ca19b0e20d,5.0,21.72,510.0,,,1.0,120.0
142458,78afa995795e4d85b5d9ceeca43f5fef,offer received,408,ae264e3637204a6fb9bb56bc8210ddfd,,,,408.0,,1.0,168.0
143736,78afa995795e4d85b5d9ceeca43f5fef,offer viewed,408,ae264e3637204a6fb9bb56bc8210ddfd,,,,,408.0,1.0,168.0
151956,78afa995795e4d85b5d9ceeca43f5fef,offer completed,510,ae264e3637204a6fb9bb56bc8210ddfd,10.0,21.72,510.0,,,1.0,168.0
155716,78afa995795e4d85b5d9ceeca43f5fef,offer received,168,5a8bc65990b245e5a138643cd4eb9837,,,,168.0,,2.0,72.0


In [336]:
user_2

Unnamed: 0,user id,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration
16202,a03223e636434f42ac4c3df47e8bac43,offer received,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,0.0,,3.0,240.0
17548,a03223e636434f42ac4c3df47e8bac43,offer viewed,6,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,,6.0,3.0,240.0
23117,a03223e636434f42ac4c3df47e8bac43,offer received,504,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,504.0,,3.0,240.0
24742,a03223e636434f42ac4c3df47e8bac43,offer received,576,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,576.0,,3.0,240.0
26327,a03223e636434f42ac4c3df47e8bac43,offer viewed,624,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,,624.0,3.0,240.0
127473,a03223e636434f42ac4c3df47e8bac43,offer received,336,3f207df678b143eea3cee63160fa8bed,,,,336.0,,2.0,96.0
128752,a03223e636434f42ac4c3df47e8bac43,offer viewed,336,3f207df678b143eea3cee63160fa8bed,,,,,336.0,2.0,96.0
160432,a03223e636434f42ac4c3df47e8bac43,offer received,408,5a8bc65990b245e5a138643cd4eb9837,,,,408.0,,2.0,72.0
198163,a03223e636434f42ac4c3df47e8bac43,,234,no offer,,1.09,,,,,
202840,a03223e636434f42ac4c3df47e8bac43,,264,no offer,,3.5,,,,,


In [330]:
offers_received_lst = df['offer id'].unique().tolist()
temp_df = user_1.head(0)

for offer in offers_received_lst:
    
    #create data frame of an offer
    offer_df = user_1[user_1['offer id'] == offer].copy()
    
    # check if the same offer has been receved more than one time if so, create flags to treat each offer independently.
    if offer_df['offer received'].count() > 1:
        cter = 0
        flag = []
        
        #create list to flag each offer            
        for index, row in offer_df.iterrows():
            if not np.isnan(row['offer received']):
                cter+=1
                flag.append(cter)
                
            else:
                flag.append(cter)
                
        offer_df['flag'] = flag 
        offer_df = offer_df.groupby(['flag' , 'offer id']).mean().reset_index()#.drop(columns='flag')

    else:
        offer_df = offer_df.groupby('offer id').mean().reset_index()

    temp_df = temp_df.append(offer_df , sort=False)

temp_df = temp_df.reset_index()
temp_df = temp_df.drop(columns=['index'])

temp_df


Unnamed: 0,user id,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration
0,,,46.0,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,19.89,132.0,0.0,6.0,1.0,168.0
1,,,532.0,f19421c1d4aa40978ebb69ca19b0e20d,5.0,21.72,510.0,504.0,582.0,1.0,120.0
2,,,442.0,ae264e3637204a6fb9bb56bc8210ddfd,10.0,21.72,510.0,408.0,408.0,1.0,168.0
3,,,192.0,5a8bc65990b245e5a138643cd4eb9837,,,,168.0,216.0,2.0,72.0
4,,,303.6,no offer,,23.532,,,,,


In [212]:
temp_df = user_2.head(0)

for offer in offers_received_lst:
    
    #create data frame of an offer
    offer_df = user_2[user_2['offer id'] == offer].copy()
    
    # check if the same offer has been receved more than one time if so, create flags to treat each offer independently.
    if offer_df['offer received'].count() > 1:
        cter = 0
        flag = []
        
        #create list to flag each offer            
        for index, row in offer_df.iterrows():
            if not np.isnan(row['offer received']):
                cter+=1
                flag.append(cter)
                
            else:
                flag.append(cter)
                
        offer_df['flag'] = flag 
        offer_df = offer_df.groupby(['flag' , 'offer id']).mean().reset_index()#.drop(columns='flag')

    else:
        offer_df = offer_df.groupby('offer id').mean().reset_index()

    temp_df = temp_df.append(offer_df , sort=False)

temp_df = temp_df.drop(columns='user id')

temp_df

Unnamed: 0,event,time,offer id,reward,amount,offer completed,offer received,offer viewed,offer type,duration,flag
0,,3,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,0.0,6.0,,240.0,1.0
1,,504,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,504.0,,,240.0,2.0
2,,600,0b1e1539f2cc45b7b9fa7c272da2e1d7,,,,576.0,624.0,,240.0,3.0
0,,336,3f207df678b143eea3cee63160fa8bed,,,,336.0,336.0,,96.0,
0,,408,5a8bc65990b245e5a138643cd4eb9837,,,,408.0,,,72.0,
0,,370,no offer,,1.55,,,,,,
