# Transcript

In [2]:
import pandas as pd
import numpy as np
import math
import json

%matplotlib inline

# read in the json files
portfolio = pd.read_json('data/portfolio.json', orient='records', lines=True)
profile = pd.read_json('data/profile.json', orient='records', lines=True)
transcript = pd.read_json('data/transcript.json', orient='records', lines=True)

In [None]:
def id_mapper(df , column):
    '''
    Map column and create a dict to change its value to an int for easier use.
    INPUT:
    df - DataFrame that has an 'id' column, for this case to be used with: portfolio and profile
    column - str - name of the column to apply function to create dictionary
    OUTPUT:
    coded_dict - (dict) A dictionary of id as key and the 'new' encoded id as value
    
    '''  
    coded_dict = dict()
    cter = 1
    
    for x in df[column]:
        if x not in coded_dict:
            coded_dict[x] = str(cter)
            cter+=1
            
    return coded_dict

user_coded_dict = id_mapper(profile , 'id')
offer_coded_dict = id_mapper(portfolio, 'id')

In [None]:
def arrange_events(df):
    '''
    cleaning and rearange of the dataframe transcript
    IMPUT: df -dataFrame _ default is transcript
    OUTPUT: modified dataFrame
    
    '''
    
    # read dictionary from 'value' feature and cerate columns 
    df = pd.concat([df.drop(['value'], axis=1), df['value'].apply(pd.Series)], axis=1)
    
    # merge offer id and offer_id columns
    df['offer id'] = df['offer id'].combine_first(df['offer_id'])
    df = df.drop(columns = ['offer_id'])
    
    # split into two dataFrames to merge rows with transaction and offer recived in the same time
    df1 = df[df['event'] != 'transaction'][['event' , 'person' , 'time' , 'offer id' , 'reward']]
    df2 = df[df['event'] == 'transaction'][['person' , 'time' , 'amount']]
    
    # merge the two dataFrames on time
    df = pd.merge(df1, df2, how='outer', on=['person', 'time'])
    
    # create columns of type of event with the value of time
    df = pd.concat([df, df.pivot_table(values='time', index=df.index, columns='event', aggfunc='first')], axis=1, sort=False)
    
    # fill NaN values in the offer id feature as 'no offer' to keep track of the transactions without an offer
    df['offer id'] = df['offer id'].fillna(value = 'no offer')
    df = df.rename(columns={'person':'user id'})
    
    # drop event and time columns
    df = df.drop(columns=['event' , 'time'])
    
    return df

In [None]:
def offer_mapper(df):
    coded_dict = id_mapper(df, 'offer_type')
    
    coded_df = df.replace({"offer_type":coded_dict})
    offer_type = coded_df[['id' , 'offer_type']]
    duration = coded_df[['id' , 'duration']]
    
    return offer_type , duration

In [None]:
offer_type_mapper , duration_mapper = offer_mapper(portfolio)

In [None]:
arrange_transcript = arrange_events(transcript)

In [None]:
arrange_transcript.head()

In [None]:
def more_than_one_offer(df):
    
    '''
    check if one offer has been sent more than one time and then merge rows from the same offer into one row
    
    '''
    offers_received_lst = df['offer id'].unique().tolist()
    total_offers_received = df['offer received'].count()
    temp_df = df.head(0)
    user_id = df['user id'].unique()[0]

    for offer in offers_received_lst:
        
        offer_df = df[df['offer id'] == offer].copy()
        
        if offer_df['offer received'].count() > 1:
            cter = 0
            flag = []
            
            #create list to flag each offer            
            for index, row in offer_df.iterrows():
                if not np.isnan(row['offer received']):
                    cter+=1
                    flag.append(cter)
                    
                else:
                    flag.append(cter)
                    
            offer_df['flag'] = flag 
            offer_df = offer_df.groupby(['flag' , 'offer id']).mean().reset_index().drop(columns='flag')

        else:
            offer_df = offer_df.groupby('offer id').mean().reset_index()
            
        temp_df = temp_df.append(offer_df , sort=False)
    
    df = temp_df    
    df[['reward' , 'amount']] = df[['reward' , 'amount']].fillna(value = 0)
    
    df = pd.merge(df, offer_type_mapper, how='left', left_on=['offer id'] , right_on=['id'])
    df['offer_type'] = df['offer_type'].fillna(value = str(4))
    df['user id'] = df['user id'].fillna(value = user_id)
    
    for row in range(len(df)):
        if df.loc[row]['offer viewed'] > df.loc[row]['offer completed']:
            df = df.drop([row])
        elif np.isnan(df.loc[row]['offer viewed']) and not np.isnan(df.loc[row]['offer completed']):
            df = df.drop([row]) 
        else:
            pass

    return df

In [None]:
user_id_lst = profile['id'].tolist()
amount_lst = []
offers_lst = []
offers_view_lst = []
offers_completed_lst = []

for user in user_id_lst:
    
    user_arrange_transcript = arrange_transcript[arrange_transcript['user id'] == user]
    user_events = more_than_one_offer(user_arrange_transcript)
    
    amount = {'user id' : user}
    offers = {'user id' : user}
    offers_view = {'user id' : user}
    offers_completed = {'user id' : user}
    
    amount.update(user_events.groupby('offer_type').mean()['amount'].to_dict())
    offers.update(user_events.groupby('offer_type').count()['offer id'].to_dict())
    offers_view.update(user_events.groupby('offer_type').count()['offer viewed'].to_dict())
    offers_completed.update(user_events.groupby('offer_type').count()['offer completed'].to_dict())

    amount_lst.append(amount)
    offers_lst.append(offers)
    offers_view_lst.append(offers_view)
    offers_completed_lst.append(offers_completed) 

In [None]:
amount_type = pd.DataFrame(amount_list)
amount_type.fillna(user_amount_type.mean(), inplace = True)
amount_type.rename(columns={'1': 'type 1', '2': 'type 2', '3': 'type 3' , '4': 'no offer'} , inplace = True)

amount_type.head()

In [None]:
user_offers = pd.DataFrame(offers_lst).drop(columns='4')
user_offers.fillna(value = 0 , inplace = True)
user_offers.rename(columns={'1': 'offers type 1', '2': 'offers type 2', '3': 'offers type 3'} , inplace = True)

user_offers.head()

In [None]:
offers_viewed = pd.DataFrame(offers_view_lst).drop(columns='4')
offers_viewed.fillna(value = 0 , inplace = True)
offers_viewed.rename(columns={'1': 'viewed type 1', '2': 'viewed type 2', '3': 'viewed type 3'} , inplace = True)

offers_viewed.head()

In [None]:
offers_completed = pd.DataFrame(offers_completed_lst).drop(columns='4')
offers_completed.fillna(value = 0 , inplace = True)
offers_completed.rename(columns={'1': 'completed type 1', '2': 'completed type 2', '3': 'completed type 3'} , inplace = True)

offers_completed.head()

In [None]:
profile.head()

In [None]:
profile_mod = pd.concat([profile_mod , pd.get_dummies(profile_mod['gender'])],axis=1)
profile_mod.drop(['gender'],axis=1, inplace=True)

profile_mod.head()

In [None]:
profile_amount = pd.merge(profile_mod, amount_type, how='inner' , left_on="id" , right_on="user id")
profile_amount.drop(['id' , 'became_member_on'],axis=1, inplace=True)
profile_amount.head()

In [None]:
profile_amount_offer = pd.merge(profile_amount, user_offers, how='inner' , on="user id")
profile_amount_offer = pd.merge(profile_amount_offer, offers_viewed, how='inner' , on="user id")
profile_amount_offer = pd.merge(profile_amount_offer, offers_completed, how='inner' , on="user id")

profile_amount_offer.head()

In [None]:
profile_amount.to_csv('data/profile_amount.csv' , index=False)

In [None]:
profile_amount_offer.to_csv('data/profile_amount_offer.csv' , index=False)

# TEST

In [None]:
user1_test = arrange_transcript[arrange_transcript['user id'] == '78afa995795e4d85b5d9ceeca43f5fef']
user2_test = arrange_transcript[arrange_transcript['user id'] == 'a03223e636434f42ac4c3df47e8bac43']

In [None]:
user1_test

In [None]:
user2_test

In [None]:
papapa = user2_test[user2_test['offer id'] == '0b1e1539f2cc45b7b9fa7c272da2e1d7'].copy()

In [None]:
pepepe = user2_test[user2_test['offer id'] == '3f207df678b143eea3cee63160fa8bed']
pepepe

In [None]:
pupupu = pepepe.head(0)
pupupu

In [None]:
pupupu = pupupu.append(papapa , sort=False)
pupupu

In [None]:
pipipi = papapa.append(pepepe , sort=False)
pipipi

In [None]:
user_ida = pipipi['user id'].unique()[0]
user_ida

In [None]:
listilla = []
cter = 0

for index, row in papapa.iterrows():
    if not np.isnan(row['offer received']):
        cter+=1
        listilla.append(cter)
    else:
        listilla.append(cter)
        
        
listilla

In [None]:
papapa['flag'] = listilla
papapa

In [None]:
papapa.groupby(['flag' , 'offer id']).mean().reset_index().drop(columns='flag')

In [None]:
papapa.columns.tolist()

# df for each user

In [None]:
user_2_mod = more_than_one_offer(user2_test)
user_2_mod

In [None]:
user_2_number_offers = user_2_mod.groupby('offer_type').count()['offer id'].to_dict()
user_2_number_offers

In [None]:
user_2_offers_completed = user_2_mod.groupby('offer_type').count()['offer completed'].to_dict()
user_2_offers_completed

In [None]:
user_2_amount = {'user id' : user_2_mod['user id'][0]}
user_2_amount.update(user_2_mod.groupby('offer_type').mean()['amount'].to_dict())
user_2_amount

In [None]:
user_2_reward = user_2_mod.groupby('offer_type').mean()['reward'].to_dict()
user_2_reward

In [None]:
user_1_mod = more_than_one_offer(user1_test)
user_1_mod

In [None]:
user_1_number_offers = user_1_mod.groupby('offer_type').count()['offer id'].to_dict()
user_1_number_offers

In [None]:
user_1_offers_completed = user_1_mod.groupby('offer_type').count()['offer completed'].to_dict()
user_1_offers_completed

In [None]:
user_1_offers_viewed = user_1_mod.groupby('offer_type').count()['offer viewed'].to_dict()
user_1_offers_viewed

In [None]:
user_1_amount = user_1_mod.groupby('offer_type').mean()['amount'].to_dict()
user_1_amount

In [None]:
user_1_reward = user_1_mod.groupby('offer_type').mean()['reward'].to_dict()
user_1_reward

# creating dictionaries ALL users

In [None]:
profile.head()
user_id_list = profile['id'].tolist()
len(user_id_list[:5])

In [None]:
amount_list = []

for user in user_id_list:
    
    user_arrange_transcript = arrange_transcript[arrange_transcript['user id'] == user]
    user_events = more_than_one_offer(user_arrange_transcript)
    
    amount = {'user id' : user}
    amount.update(user_events.groupby('offer_type').mean()['amount'].to_dict())

    amount_list.append(amount)

In [None]:
user_amount_type = pd.DataFrame(amount_list)
user_amount_type.head()

In [None]:
amount_type = user_amount_type.fillna(user_amount_type.mean())
amount_type.head()

In [None]:
profile.head()

In [None]:
df_to_model = pd.merge(profile, amount_type, how='outer' , left_on="id" , right_on="user id")

In [None]:
df_to_model.head()

# dont know

In [None]:
for index, row in user1_test.iterrows():
    print(row['offer viewed'], row['offer completed'])

In [None]:
for index, row in user1_test.iterrows():
    if row['offer viewed'] > row['offer completed']:
        print(row['offer viewed'], row['offer completed'])
        user1_test = user1_test.drop([row])
    else:
        pass
user1_test

In [None]:
for row in range(len(user2_offer)):
    if user2_offer.loc[row]['offer viewed'] > user2_offer.loc[row]['offer completed']:
        user2_offer = user2_offer.drop([row])
    else:
        pass
user1_offer

In [None]:
user1_test = user1_test.groupby('offer id').mean()
user2_test = user2_test.groupby('offer id').mean()
user1_test

In [None]:
portfolio.head()

In [None]:
offer_type_coded_dict = id_mapper(portfolio , 'offer_type')

In [None]:
portfolio_mod = portfolio.replace({"offer_type":offer_type_coded_dict})[['id' , 'offer_type']]
portfolio_mod

In [None]:
user1_test.index

In [None]:
user1_offer = pd.merge(user1_test, portfolio_mod, how='left', left_on=user1_test.index , right_on=['id'])
user1_offer['offer_type'] = user1_offer['offer_type'].fillna(value = 4)

user2_offer = pd.merge(user2_test, portfolio_mod, how='left', left_on=user2_test.index , right_on=['id'])
user2_offer['offer_type'] = user2_offer['offer_type'].fillna(value = 4)

user1_offer

In [None]:
for row in range(len(user1_offer)):
    if user1_offer.loc[row]['offer viewed'] > user1_offer.loc[row]['offer completed']:
        user1_offer = user1_offer.drop([row])
    else:
        pass
user1_offer

In [None]:
for row in range(len(user2_offer)):
    if user2_offer.loc[row]['offer viewed'] > user2_offer.loc[row]['offer completed']:
        user2_offer = user2_offer.drop([row])
    else:
        pass
user2_offer

In [None]:
user_offer[['reward' , 'amount']] = user_offer[['reward' , 'amount']].fillna(value = 0)
user_offer

In [None]:
user_offer['time_recived_viewed'] = user_offer['offer viewed'] - user_offer['offer received']
user_offer['time_viewed_completed'] = user_offer['offer completed'] - user_offer['offer viewed']
user_offer = user_offer.drop(columns=['id' ,'offer completed' , 'offer received' , 'offer viewed'])
user_offer

In [None]:
user_offer.groupby('offer_type').mean()

In [None]:
total_amount = user_offer['amount'].groupby(user_offer['offer_type']) 
total_amount.mean()

In [None]:
user_1_dic = {'offer'}

In [None]:
#profile_mod = profile.replace({"id": user_coded_dict})

In [None]:
#portfolio_mod = portfolio.replace({"id": offer_coded_dict})

In [None]:
modify_transcript.to_csv('data/events.csv')

# Testing and tables to explain

In [None]:
transcript_mod = pd.concat([transcript.drop(['value'], axis=1), transcript['value'].apply(pd.Series)], axis=1)
transcript_mod.head()

In [None]:
print('All offer_id have a reward:' , (transcript_mod.groupby('offer_id')['person'].count().sum() == transcript_mod.groupby('reward')['person'].count().sum()))

In [None]:
transcript_mod['offer id'] = transcript_mod['offer id'].combine_first(transcript_mod['offer_id'])#.fillna(value = 'no offer')
transcript_mod = transcript_mod.drop(columns = ['offer_id'])
#transcript_mod = transcript_mod.fillna(value = 0 , axis = 1)

In [None]:
test_user = transcript_mod[transcript_mod['person'] == '78afa995795e4d85b5d9ceeca43f5fef']

In [None]:
test_user

In [None]:
test_user.groupby('offer id').mean()

In [None]:
time_amount = test_user[test_user['event'] == 'transaction'][['time' , 'amount']]
time_amount

In [None]:
otra_tabla =  test_user[test_user['event'] != 'transaction'][['event' , 'time' , 'offer id' , 'reward']]
otra_tabla

In [None]:
tada = pd.merge(otra_tabla, time_amount, how='outer', on='time')
tada

In [None]:
tada = pd.concat([tada, tada.pivot_table(values='time', index=tada.index, columns='event', aggfunc='first')], axis=1, sort=False)
tada

In [None]:
tada['offer id'] = tada['offer id'].fillna(value = 'no offer')
tada = tada.drop(columns=['event' , 'time'])
tada

In [None]:
tada.groupby('offer id').mean()

In [None]:
total_amount = tada['amount'].groupby(user2_test['offer type']) 
total_amount.mean()