In [1]:
import pandas as pd
import numpy as np
import math
import json
import matplotlib.pyplot as plt
import seaborn as sns
% matplotlib inline

# read in the json files
portfolio = pd.read_json('data/portfolio.json', orient='records', lines=True)
profile = pd.read_json('data/profile.json', orient='records', lines=True)
transcript = pd.read_json('data/transcript.json', orient='records', lines=True)

Gender, age and income correlate too much and it is a bit pointless to group customers by them. 
Let's add some variance to the profile data.

##### Adding how much customer has spent during the testing period

In [2]:
# Collecting transactions from transcript
transactions = transcript[transcript['event']=='transaction']

In [3]:
# Opening value field to get amount spent to an own column
transactions_opened = pd.concat([transactions.drop(['value'], axis=1), transactions['value'].apply(pd.Series)], axis=1)
transactions_opened.head()

Unnamed: 0,event,person,time,amount
12654,transaction,02c083884c7d45b39cc68e1314fec56c,0,0.83
12657,transaction,9fa9ae8f57894cc9a3b8a9bbe0fc1b2f,0,34.56
12659,transaction,54890f68699049c2a04d415abc25e717,0,13.23
12670,transaction,b2f1cd155b864803ad8334cdf13c4bd2,0,19.51
12671,transaction,fe97aa22dd3e48c8b143116a8403dd52,0,18.97


In [4]:
# Amount sum per customer

aggregation_functions = {'amount': 'sum'}
transactions_per_customer = transactions_opened.groupby(transactions_opened['person']).aggregate(aggregation_functions)

In [5]:
transactions_per_customer.sort_values(by=['amount']).tail()

Unnamed: 0_level_0,amount
person,Unnamed: 1_level_1
73afdeca19e349b98f09e928644610f8,1319.97
626df8678e2a4953b9098246418c9cfa,1321.42
ae6f43089b674728a50b8727252d3305,1327.74
f1d65ae63f174b8f80fa063adcaa63b7,1365.66
3c8d541112a74af99e88abbd0692f00e,1608.69


In [6]:
customers = profile
customers.rename(columns={'id':'customer_id'}, inplace=True)
customers = customers.set_index('customer_id')

In [7]:
# Adding amount 
customers = pd.concat([customers, transactions_per_customer], axis=1, sort=False)
customers.rename(columns={'amount':'amount_spent'}, inplace=True)

In [8]:
customers.head()

Unnamed: 0,age,became_member_on,gender,income,amount_spent
68be06ca386d4c31939f3a4f0e3dd783,118,20170212,,,20.4
0610b486422d4921ae7d2bf64640c50b,55,20170715,F,112000.0,77.01
38fe809add3b4fcf9315a9694bb96ff5,118,20180712,,,14.3
78afa995795e4d85b5d9ceeca43f5fef,75,20170509,F,100000.0,159.27
a03223e636434f42ac4c3df47e8bac43,118,20170804,,,4.65


In [9]:
len(customers[customers['amount_spent'].isnull()])

422

#### Adding offer events

In [10]:
# Collecting offer events from transcript
offer_events = transcript[(transcript['event']=='offer received') 
                          | (transcript['event']=='offer viewed')
                          | (transcript['event']=='offer completed')]

In [11]:
offer_events.tail()

Unnamed: 0,event,person,time,value
306497,offer completed,a6f84f4e976f44508c358cc9aba6d2b3,714,{'offer_id': '2298d6c36e964ae4a3e7e9706d1fb8c2...
306506,offer completed,b895c57e8cd047a8872ce02aa54759d6,714,{'offer_id': 'fafdcd668e3743c1bb461111dcafc2a4...
306507,offer viewed,8dda575c2a1d44b9ac8e8b07b93d1f8e,714,{'offer id': '0b1e1539f2cc45b7b9fa7c272da2e1d7'}
306509,offer completed,8431c16f8e1d440880db371a68f82dd0,714,{'offer_id': 'fafdcd668e3743c1bb461111dcafc2a4...
306527,offer completed,24f56b5e1849462093931b164eb803b5,714,{'offer_id': 'fafdcd668e3743c1bb461111dcafc2a4...


In [12]:
# one-hot-encode dataframe
offer_events_opened_orig = pd.concat([offer_events.drop(['value'], axis=1), offer_events['value'].apply(pd.Series)], axis=1)

In [13]:
offer_events_opened = offer_events_opened_orig.copy()

In [14]:
offer_events_opened_orig.head()

Unnamed: 0,event,person,time,offer id,offer_id,reward
0,offer received,78afa995795e4d85b5d9ceeca43f5fef,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,,
1,offer received,a03223e636434f42ac4c3df47e8bac43,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,,
2,offer received,e2127556f4f64592b11af22de27a7932,0,2906b810c7d4411798c6938adc9daaa5,,
3,offer received,8ec6ce2a7e7949b1bf142def7d0e0586,0,fafdcd668e3743c1bb461111dcafc2a4,,
4,offer received,68617ca6246f4fbc85e91a2a49552598,0,4d5c57ea9a6940dd891ad53e9dbe8da0,,


In [15]:
# It seems there might be offer ids in two columns, 'offer id' and 'offer_id'. 
## Combining those if that's the case.
def combine_offer_id_cols(data):
    data['offer id'] = data['offer id'].fillna(data['offer_id'])
    data.drop(['offer_id'], inplace=True, axis=1)
    return data

combine_offer_id_cols(offer_events_opened)

offer_events_opened.head(10)

Unnamed: 0,event,person,time,offer id,reward
0,offer received,78afa995795e4d85b5d9ceeca43f5fef,0,9b98b8c7a33c4b65b9aebfe6a799e6d9,
1,offer received,a03223e636434f42ac4c3df47e8bac43,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,
2,offer received,e2127556f4f64592b11af22de27a7932,0,2906b810c7d4411798c6938adc9daaa5,
3,offer received,8ec6ce2a7e7949b1bf142def7d0e0586,0,fafdcd668e3743c1bb461111dcafc2a4,
4,offer received,68617ca6246f4fbc85e91a2a49552598,0,4d5c57ea9a6940dd891ad53e9dbe8da0,
5,offer received,389bc3fa690240e798340f5a15918d5c,0,f19421c1d4aa40978ebb69ca19b0e20d,
6,offer received,c4863c7985cf408faee930f111475da3,0,2298d6c36e964ae4a3e7e9706d1fb8c2,
7,offer received,2eeac8d8feae4a8cad5a6af0499a211d,0,3f207df678b143eea3cee63160fa8bed,
8,offer received,aa4862eba776480b8bb9c68455b8c2e1,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,
9,offer received,31dda685af34476cad5bc968bdb01c53,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,


In [16]:
offer_events_opened_test = offer_events_opened
offer_events_opened_test['day'] = offer_events_opened_test['time'] / 24
offer_events_opened_test.tail(20)

Unnamed: 0,event,person,time,offer id,reward,day
306419,offer viewed,5a3f7fcb450a4cb693ab5267b4d49b40,714,5a8bc65990b245e5a138643cd4eb9837,,29.75
306421,offer viewed,89417e13fcd54dd78b80f97f7f085cf1,714,ae264e3637204a6fb9bb56bc8210ddfd,,29.75
306423,offer completed,8771f96bc7f947c09a8f78ee2e14a1a7,714,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,29.75
306430,offer completed,0355c6a5fdbc429ea5b05e8a9ecd2eae,714,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,29.75
306433,offer viewed,ec1761735b034ae8874bc979f0cd5862,714,5a8bc65990b245e5a138643cd4eb9837,,29.75
306440,offer completed,f74033a8c6834303bd1b3ef0c98d11d8,714,0b1e1539f2cc45b7b9fa7c272da2e1d7,5.0,29.75
306441,offer viewed,d56386cf344c4829bbf420d1895dca37,714,5a8bc65990b245e5a138643cd4eb9837,,29.75
306444,offer completed,84ad352984b04b73b6dbff8d719de321,714,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,29.75
306450,offer viewed,9b51e8797290403b90d09d864dec4b94,714,3f207df678b143eea3cee63160fa8bed,,29.75
306455,offer completed,f3e801caeafe4899b3b989b586e74ac7,714,0b1e1539f2cc45b7b9fa7c272da2e1d7,5.0,29.75


## TEST SET

In [118]:
test_set = (offer_events_opened_test[offer_events_opened_test.person=='389bc3fa690240e798340f5a15918d5c']).sort_values(by=['offer id'])

In [119]:
test_set

Unnamed: 0,event,person,time,offer id,reward,day
110831,offer received,389bc3fa690240e798340f5a15918d5c,336,2906b810c7d4411798c6938adc9daaa5,,14.0
132803,offer viewed,389bc3fa690240e798340f5a15918d5c,354,2906b810c7d4411798c6938adc9daaa5,,14.75
200085,offer completed,389bc3fa690240e798340f5a15918d5c,498,2906b810c7d4411798c6938adc9daaa5,2.0,20.75
179953,offer viewed,389bc3fa690240e798340f5a15918d5c,438,9b98b8c7a33c4b65b9aebfe6a799e6d9,,18.25
53179,offer received,389bc3fa690240e798340f5a15918d5c,168,9b98b8c7a33c4b65b9aebfe6a799e6d9,,7.0
77212,offer viewed,389bc3fa690240e798340f5a15918d5c,192,9b98b8c7a33c4b65b9aebfe6a799e6d9,,8.0
150601,offer received,389bc3fa690240e798340f5a15918d5c,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,17.0
200086,offer completed,389bc3fa690240e798340f5a15918d5c,498,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,20.75
214275,offer viewed,389bc3fa690240e798340f5a15918d5c,504,f19421c1d4aa40978ebb69ca19b0e20d,,21.0
201577,offer received,389bc3fa690240e798340f5a15918d5c,504,f19421c1d4aa40978ebb69ca19b0e20d,,21.0


In [120]:
portfolio[(portfolio['id'] == '2906b810c7d4411798c6938adc9daaa5') 
          | (portfolio['id'] == '9b98b8c7a33c4b65b9aebfe6a799e6d9')
         | (portfolio['id'] == 'f19421c1d4aa40978ebb69ca19b0e20d')
         | (portfolio['id'] == 'fafdcd668e3743c1bb461111dcafc2a4') ]

Unnamed: 0,channels,difficulty,duration,id,offer_type,reward
3,"[web, email, mobile]",5,7,9b98b8c7a33c4b65b9aebfe6a799e6d9,bogo,5
6,"[web, email, mobile, social]",10,10,fafdcd668e3743c1bb461111dcafc2a4,discount,2
8,"[web, email, mobile, social]",5,5,f19421c1d4aa40978ebb69ca19b0e20d,bogo,5
9,"[web, email, mobile]",10,7,2906b810c7d4411798c6938adc9daaa5,discount,2


In [121]:
# Collect offer duration from portfolio 
offers = portfolio.copy()

test_set['offer_duration'] = test_set['offer id'].map(offers.set_index('id')['duration'])

In [122]:
test_set

Unnamed: 0,event,person,time,offer id,reward,day,offer_duration
110831,offer received,389bc3fa690240e798340f5a15918d5c,336,2906b810c7d4411798c6938adc9daaa5,,14.0,7
132803,offer viewed,389bc3fa690240e798340f5a15918d5c,354,2906b810c7d4411798c6938adc9daaa5,,14.75,7
200085,offer completed,389bc3fa690240e798340f5a15918d5c,498,2906b810c7d4411798c6938adc9daaa5,2.0,20.75,7
179953,offer viewed,389bc3fa690240e798340f5a15918d5c,438,9b98b8c7a33c4b65b9aebfe6a799e6d9,,18.25,7
53179,offer received,389bc3fa690240e798340f5a15918d5c,168,9b98b8c7a33c4b65b9aebfe6a799e6d9,,7.0,7
77212,offer viewed,389bc3fa690240e798340f5a15918d5c,192,9b98b8c7a33c4b65b9aebfe6a799e6d9,,8.0,7
150601,offer received,389bc3fa690240e798340f5a15918d5c,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,17.0,7
200086,offer completed,389bc3fa690240e798340f5a15918d5c,498,9b98b8c7a33c4b65b9aebfe6a799e6d9,5.0,20.75,7
214275,offer viewed,389bc3fa690240e798340f5a15918d5c,504,f19421c1d4aa40978ebb69ca19b0e20d,,21.0,5
201577,offer received,389bc3fa690240e798340f5a15918d5c,504,f19421c1d4aa40978ebb69ca19b0e20d,,21.0,5


### Adding last possible hour to get the offer

In [123]:
completions_in = test_set[test_set['event']=='offer received'].copy()
completions_in['last_hour'] = completions_in['time'] + (completions_in['offer_duration'] * 24)
completions_in

Unnamed: 0,event,person,time,offer id,reward,day,offer_duration,last_hour
110831,offer received,389bc3fa690240e798340f5a15918d5c,336,2906b810c7d4411798c6938adc9daaa5,,14.0,7,504
53179,offer received,389bc3fa690240e798340f5a15918d5c,168,9b98b8c7a33c4b65b9aebfe6a799e6d9,,7.0,7,336
150601,offer received,389bc3fa690240e798340f5a15918d5c,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,17.0,7,576
201577,offer received,389bc3fa690240e798340f5a15918d5c,504,f19421c1d4aa40978ebb69ca19b0e20d,,21.0,5,624
5,offer received,389bc3fa690240e798340f5a15918d5c,0,f19421c1d4aa40978ebb69ca19b0e20d,,0.0,5,120
245126,offer received,389bc3fa690240e798340f5a15918d5c,576,fafdcd668e3743c1bb461111dcafc2a4,,24.0,10,816


###  Adding counter of unique offers (because one cust can get same offer twice)

In [124]:
test_set = test_set.sort_values(by=['time'])

In [125]:
test_set['unique_offer_cum'] = test_set.groupby(['person', 'offer id', 'event']).cumcount()

In [126]:
test_set

Unnamed: 0,event,person,time,offer id,reward,day,offer_duration,unique_offer_cum
12650,offer viewed,389bc3fa690240e798340f5a15918d5c,0,f19421c1d4aa40978ebb69ca19b0e20d,,0.0,5,0
5,offer received,389bc3fa690240e798340f5a15918d5c,0,f19421c1d4aa40978ebb69ca19b0e20d,,0.0,5,0
32626,offer completed,389bc3fa690240e798340f5a15918d5c,60,f19421c1d4aa40978ebb69ca19b0e20d,5.0,2.5,5,0
53179,offer received,389bc3fa690240e798340f5a15918d5c,168,9b98b8c7a33c4b65b9aebfe6a799e6d9,,7.0,7,0
77212,offer viewed,389bc3fa690240e798340f5a15918d5c,192,9b98b8c7a33c4b65b9aebfe6a799e6d9,,8.0,7,0
110831,offer received,389bc3fa690240e798340f5a15918d5c,336,2906b810c7d4411798c6938adc9daaa5,,14.0,7,0
132803,offer viewed,389bc3fa690240e798340f5a15918d5c,354,2906b810c7d4411798c6938adc9daaa5,,14.75,7,0
150601,offer received,389bc3fa690240e798340f5a15918d5c,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,17.0,7,1
179953,offer viewed,389bc3fa690240e798340f5a15918d5c,438,9b98b8c7a33c4b65b9aebfe6a799e6d9,,18.25,7,1
200085,offer completed,389bc3fa690240e798340f5a15918d5c,498,2906b810c7d4411798c6938adc9daaa5,2.0,20.75,7,0


### Adding viewed and completion time

In [127]:
#test_set.loc[test_set['event'] == 'offer completed', 'completion time'] = test_set['time']
#test_set.loc[test_set['event'] == 'offer viewed', 'viewed time'] = test_set['time']

In [128]:
test_set = pd.concat([test_set, 
                    pd.get_dummies(test_set['event'], 
                    prefix=None)], 
                    axis=1)
test_set.drop(['event'], axis=1, inplace=True)

In [129]:
test_set

Unnamed: 0,person,time,offer id,reward,day,offer_duration,unique_offer_cum,completion time,viewed time,offer completed,offer received,offer viewed
12650,389bc3fa690240e798340f5a15918d5c,0,f19421c1d4aa40978ebb69ca19b0e20d,,0.0,5,0,,0.0,0,0,1
5,389bc3fa690240e798340f5a15918d5c,0,f19421c1d4aa40978ebb69ca19b0e20d,,0.0,5,0,,,0,1,0
32626,389bc3fa690240e798340f5a15918d5c,60,f19421c1d4aa40978ebb69ca19b0e20d,5.0,2.5,5,0,60.0,,1,0,0
53179,389bc3fa690240e798340f5a15918d5c,168,9b98b8c7a33c4b65b9aebfe6a799e6d9,,7.0,7,0,,,0,1,0
77212,389bc3fa690240e798340f5a15918d5c,192,9b98b8c7a33c4b65b9aebfe6a799e6d9,,8.0,7,0,,192.0,0,0,1
110831,389bc3fa690240e798340f5a15918d5c,336,2906b810c7d4411798c6938adc9daaa5,,14.0,7,0,,,0,1,0
132803,389bc3fa690240e798340f5a15918d5c,354,2906b810c7d4411798c6938adc9daaa5,,14.75,7,0,,354.0,0,0,1
150601,389bc3fa690240e798340f5a15918d5c,408,9b98b8c7a33c4b65b9aebfe6a799e6d9,,17.0,7,1,,,0,1,0
179953,389bc3fa690240e798340f5a15918d5c,438,9b98b8c7a33c4b65b9aebfe6a799e6d9,,18.25,7,1,,438.0,0,0,1
200085,389bc3fa690240e798340f5a15918d5c,498,2906b810c7d4411798c6938adc9daaa5,2.0,20.75,7,0,498.0,,1,0,0


In [130]:
test_set.loc[test_set['offer received'] == 1, 'received time'] = test_set['time']
test_set.loc[test_set['offer viewed'] == 1, 'viewed time'] = test_set['time']
test_set.loc[test_set['offer completed'] == 1, 'completion time'] = test_set['time']

In [131]:
aggregation_functions = {
    'offer received': 'sum', 
    'offer viewed': 'sum',
    'offer completed': 'sum', 
    'reward': 'sum',
    'offer_duration': 'first',
    'received time': 'first',
    'viewed time': 'first',
    'completion time': 'first'}

In [132]:
test_set = test_set.groupby(['person','offer id','unique_offer_cum']).aggregate(aggregation_functions)

In [133]:
test_set

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time
person,offer id,unique_offer_cum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,0,1,1,1,2.0,7,336.0,354.0,498.0
389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,1,1,1,5.0,7,168.0,192.0,498.0
389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,1,1,0,0.0,7,408.0,438.0,
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,0,1,1,1,5.0,5,0.0,0.0,60.0
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0
389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,0,1,1,1,2.0,10,576.0,588.0,600.0


In [52]:
test_set.index.values

array([ ('389bc3fa690240e798340f5a15918d5c', '2906b810c7d4411798c6938adc9daaa5', 0),
       ('389bc3fa690240e798340f5a15918d5c', '9b98b8c7a33c4b65b9aebfe6a799e6d9', 0),
       ('389bc3fa690240e798340f5a15918d5c', '9b98b8c7a33c4b65b9aebfe6a799e6d9', 1),
       ('389bc3fa690240e798340f5a15918d5c', 'f19421c1d4aa40978ebb69ca19b0e20d', 0),
       ('389bc3fa690240e798340f5a15918d5c', 'f19421c1d4aa40978ebb69ca19b0e20d', 1),
       ('389bc3fa690240e798340f5a15918d5c', 'fafdcd668e3743c1bb461111dcafc2a4', 0)], dtype=object)

In [38]:
test_set.iloc[1]

offer received       1.0
offer viewed         1.0
offer completed      1.0
reward               5.0
offer_duration       7.0
received time      168.0
viewed time        192.0
completion time    498.0
Name: (389bc3fa690240e798340f5a15918d5c, 9b98b8c7a33c4b65b9aebfe6a799e6d9, 0), dtype: float64

In [51]:
test_set.iloc[[4]]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time
person,offer id,unique_offer_cum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0


Depending on the customer actions let's separate the offer rows by customer reaction.

- Effected: customer viewed and completed the offer in time - marketing was successful
- Churn: customer did not view or complete the offer
- Disinterested: customer saw the offer but did not complete - the offer was not interested enough
- Active: customer did not saw the offer but completed it anyway or saw it and completed it outside the time frame - an active customer, marketing not really needed

In [134]:
test_set2 = test_set.copy()

conditions = [
    (test_set['viewed time'].isnull()) & (test_set['completion time'].isnull()),
    (test_set['viewed time'].notnull()) & (test_set['completion time'].isnull()),
    ((test_set['completion time']-test_set['received time'])/24 < test_set['offer_duration']),
    (test_set['completion time'].notnull())]
choices = ['churn', 'disinterested', 'effected', 'active']
test_set2['status'] = np.select(conditions, choices, default='None')


In [135]:
test_set2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time,status
person,offer id,unique_offer_cum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,0,1,1,1,2.0,7,336.0,354.0,498.0,effected
389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,1,1,1,5.0,7,168.0,192.0,498.0,active
389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,1,1,0,0.0,7,408.0,438.0,,disinterested
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,0,1,1,1,5.0,5,0.0,0.0,60.0,effected
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0,effected
389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,0,1,1,1,2.0,10,576.0,588.0,600.0,effected


# Will the customer complete an offer?

In [136]:
test_set2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time,status
person,offer id,unique_offer_cum,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,0,1,1,1,2.0,7,336.0,354.0,498.0,effected
389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,1,1,1,5.0,7,168.0,192.0,498.0,active
389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,1,1,0,0.0,7,408.0,438.0,,disinterested
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,0,1,1,1,5.0,5,0.0,0.0,60.0,effected
389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0,effected
389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,0,1,1,1,2.0,10,576.0,588.0,600.0,effected


In [145]:
test_set3 = test_set2.reset_index()
test_set3

Unnamed: 0,person,offer id,unique_offer_cum,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time,status
0,389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,0,1,1,1,2.0,7,336.0,354.0,498.0,effected
1,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,1,1,1,5.0,7,168.0,192.0,498.0,active
2,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,1,1,0,0.0,7,408.0,438.0,,disinterested
3,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,0,1,1,1,5.0,5,0.0,0.0,60.0,effected
4,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0,effected
5,389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,0,1,1,1,2.0,10,576.0,588.0,600.0,effected


In [144]:
test_set3

0    1.0
1    0.0
2    0.0
3    1.0
4    1.0
5    1.0
Name: success, dtype: float64

In [146]:
test_set3['success'] = 0
test_set3

Unnamed: 0,person,offer id,unique_offer_cum,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time,status,success
0,389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,0,1,1,1,2.0,7,336.0,354.0,498.0,effected,0
1,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,1,1,1,5.0,7,168.0,192.0,498.0,active,0
2,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,1,1,0,0.0,7,408.0,438.0,,disinterested,0
3,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,0,1,1,1,5.0,5,0.0,0.0,60.0,effected,0
4,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0,effected,0
5,389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,0,1,1,1,2.0,10,576.0,588.0,600.0,effected,0


In [147]:
test_set3.loc[test_set3['status'] == 'effected', 'success'] = 1

In [148]:
test_set3

Unnamed: 0,person,offer id,unique_offer_cum,offer received,offer viewed,offer completed,reward,offer_duration,received time,viewed time,completion time,status,success
0,389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,0,1,1,1,2.0,7,336.0,354.0,498.0,effected,1
1,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,1,1,1,5.0,7,168.0,192.0,498.0,active,0
2,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,1,1,1,0,0.0,7,408.0,438.0,,disinterested,0
3,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,0,1,1,1,5.0,5,0.0,0.0,60.0,effected,1
4,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1,1,1,1,5.0,5,504.0,504.0,600.0,effected,1
5,389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,0,1,1,1,2.0,10,576.0,588.0,600.0,effected,1


In [149]:
test_set4 = test_set3.copy()

In [150]:
test_set4 = test_set4[['person', 'offer id', 'success']]
test_set4

Unnamed: 0,person,offer id,success
0,389bc3fa690240e798340f5a15918d5c,2906b810c7d4411798c6938adc9daaa5,1
1,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0
2,389bc3fa690240e798340f5a15918d5c,9b98b8c7a33c4b65b9aebfe6a799e6d9,0
3,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1
4,389bc3fa690240e798340f5a15918d5c,f19421c1d4aa40978ebb69ca19b0e20d,1
5,389bc3fa690240e798340f5a15918d5c,fafdcd668e3743c1bb461111dcafc2a4,1


In [107]:
test_set3 = test_set3[['person', 'status']]
test_set3 = pd.concat([test_set3.drop(['status'], axis=1), test_set3['status'].apply(pd.Series)], axis=1)
test_set3

Unnamed: 0,person,0
0,389bc3fa690240e798340f5a15918d5c,effected
1,389bc3fa690240e798340f5a15918d5c,active
2,389bc3fa690240e798340f5a15918d5c,disinterested
3,389bc3fa690240e798340f5a15918d5c,effected
4,389bc3fa690240e798340f5a15918d5c,effected
5,389bc3fa690240e798340f5a15918d5c,effected


In [110]:
test_set3 = test_set3[['person', 'status']]

In [111]:
test_set3

Unnamed: 0,person,status
0,389bc3fa690240e798340f5a15918d5c,effected
1,389bc3fa690240e798340f5a15918d5c,active
2,389bc3fa690240e798340f5a15918d5c,disinterested
3,389bc3fa690240e798340f5a15918d5c,effected
4,389bc3fa690240e798340f5a15918d5c,effected
5,389bc3fa690240e798340f5a15918d5c,effected


In [None]:
test_set3 = pd.concat([test_set3.drop(['status'], axis=1), test_set3['status'].apply(pd.Series)], axis=1)

In [None]:
# removing index column name so that it can be concatenated to customers dataframe
del customer_offer_events.index.name

In [None]:
customers = pd.concat([customers, customer_offer_events], axis=1, sort=False)

In [None]:
# Add percentages of offers viewed and offers completed
# Note - at this point it does not matter if offer affected person (it was viewed before completion)
customers['view_pct'] = customers['offer viewed'] / customers['offer received']
customers['completion_pct'] = customers['offer completed'] / customers['offer received']