In [12]:
import pandas as pd
import numpy as np

transactions = pd.read_pickle(r'data\03_int\int_transactions.pkl')
offers = pd.read_pickle(r'data/03_int/int_transcript_offers.pkl')

In [13]:
#Count the number of offer_ids per customer_id in the offers dataframe
offers_per_cust = offers.groupby('customer_id').agg(
    num_offers_received=('offer_id', 'count'),
    num_offers_viewed=('offer_viewed', 'count'),
    num_offers_completed=('offer_completed', 'count')
).reset_index()

offers_per_cust.sort_values('num_offers_received', ascending=False, inplace=True)
offers_per_cust.head()

Unnamed: 0,customer_id,num_offers_received,num_offers_viewed,num_offers_completed
16993,ffff82501cea40309d5fdd7edcca4a07,6,6,6
11164,a63cf729b0b44a12be0993e414676489,6,6,5
2113,2119673616e04d3eb001c92dee19ddc5,6,6,5
11070,a4f3350b08934d41a80e526317842b40,6,5,2
2108,20fce895bebe4d5c9ce55fe83ea9ef90,6,5,4


In [14]:
# Count transactions and sum transaction amounts per customer_id
total_transactions = transactions.groupby('customer_id').agg(
        total_transactions=('transaction_amount', 'count'),
        total_transaction_amount=('transaction_amount', 'sum')
).reset_index()

total_transactions.sort_values(by='total_transactions', ascending=False, inplace=True)
total_transactions.head()

Unnamed: 0,customer_id,total_transactions,total_transaction_amount
7887,79d9d4f86aca4bed9290350fb43817c2,36,173.41
9239,8dbfa485249f409aa223a2130f40634a,36,76.46
9702,94de646f7b6041228ca7dec82adb97d2,35,90.23
11652,b1f4ece7d49342628a9ed77aee2cde58,32,133.02
6118,5e60c6aa3b834e44b822ea43a3efea26,32,103.66


In [15]:
### This counts the number of transactions during the offer period ###
merged = pd.merge(transactions, offers, on='customer_id')

# Sort by 'transaction' and 'offer_received'
merged.sort_values(['transaction', 'offer_received'], inplace=True)

merged.head()

Unnamed: 0,customer_id,transaction,transaction_amount,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,duration_hrs,expiration,reward
4187,0206e1388c34454caba2b7fce3123943,0,16.62,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,0.0,0.0,0.0,0.0,1.0,10,120,120.0,10.0
4357,020cd0f8047142e18a754303f9337d53,0,20.5,fafdcd668e3743c1bb461111dcafc2a4,discount,0.0,12.0,0.0,0.0,0.0,10,240,240.0,2.0
5687,029e063479234fb1b6c8727c3d45de62,0,37.34,ae264e3637204a6fb9bb56bc8210ddfd,bogo,0.0,,0.0,0.0,-1.0,10,168,168.0,10.0
5983,02c083884c7d45b39cc68e1314fec56c,0,0.83,ae264e3637204a6fb9bb56bc8210ddfd,bogo,0.0,0.0,,,0.0,10,168,168.0,
7663,0383a12d3a2d4fbb86ec83bd0c25eead,0,2.65,5a8bc65990b245e5a138643cd4eb9837,informational,0.0,0.0,,,,0,72,72.0,


In [16]:
### This counts the number of transactions during the offer period ###
merged = pd.merge(transactions, offers, on='customer_id')

# Sort by 'transaction' and 'offer_received'
merged.sort_values(['transaction', 'offer_received'], inplace=True)

# Drop duplicate transactions, keeping the first occurrence
merged.drop_duplicates(subset='transaction', keep='first', inplace=True)

# Filter transactions that occurred during the offer period
filtered_transactions = merged[(merged['transaction'] >= merged['offer_received']) & 
                               (merged['transaction'] <= merged['offer_completed'])]

# Aggregate the filtered transactions
aggregated = filtered_transactions.groupby(['customer_id', 'offer_id']).agg(
    total_transactions=('transaction_amount', 'count'),
    total_transaction_amount=('transaction_amount', 'sum'),
    transaction_list=('transaction', list),
    amount_list=('transaction_amount', list)
).reset_index()

aggregated.sort_values(by='total_transactions', ascending=False, inplace=True)

aggregated.head()

Unnamed: 0,customer_id,offer_id,total_transactions,total_transaction_amount,transaction_list,amount_list
2,00840a2ca5d2408e982d56544dc14ffd,2906b810c7d4411798c6938adc9daaa5,5,22.29,"[168, 234, 246, 294, 528]","[6.86, 3.07, 5.11, 5.46, 1.79]"
4,0099bf30e4cb4265875266eb3eb25eab,fafdcd668e3743c1bb461111dcafc2a4,3,9.44,"[126, 156, 222]","[3.24, 1.13, 5.07]"
0,0020c2b971eb4e9188eac86d93036a77,fafdcd668e3743c1bb461111dcafc2a4,1,17.63,[54],[17.63]
1,00715b6e55c3431cb56ff7307eb19675,ae264e3637204a6fb9bb56bc8210ddfd,1,27.26,[12],[27.26]
3,0091d2b6a5ea4defaa8393e4e816db60,4d5c57ea9a6940dd891ad53e9dbe8da0,1,18.26,[6],[18.26]


In [17]:
### Combining offers with aggregated transactions to check the code ###
# merge offers to aggregated on customer_id and offer_id
combined = pd.merge(offers, aggregated, on=['customer_id', 'offer_id'])
combined.head()

Unnamed: 0,customer_id,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,duration_hrs,expiration,reward,total_transactions,total_transaction_amount,transaction_list,amount_list
0,0020c2b971eb4e9188eac86d93036a77,fafdcd668e3743c1bb461111dcafc2a4,discount,0.0,12.0,54.0,42.0,2.0,10,240,240.0,2.0,1,17.63,[54],[17.63]
1,0020c2b971eb4e9188eac86d93036a77,fafdcd668e3743c1bb461111dcafc2a4,discount,0.0,12.0,54.0,42.0,2.0,10,240,240.0,2.0,1,17.63,[54],[17.63]
2,00715b6e55c3431cb56ff7307eb19675,ae264e3637204a6fb9bb56bc8210ddfd,bogo,0.0,36.0,12.0,12.0,0.0,10,168,168.0,10.0,1,27.26,[12],[27.26]
3,00840a2ca5d2408e982d56544dc14ffd,2906b810c7d4411798c6938adc9daaa5,discount,0.0,510.0,540.0,30.0,2.0,10,168,168.0,2.0,5,22.29,"[168, 234, 246, 294, 528]","[6.86, 3.07, 5.11, 5.46, 1.79]"
4,0091d2b6a5ea4defaa8393e4e816db60,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,0.0,6.0,6.0,0.0,1.0,10,120,120.0,10.0,1,18.26,[6],[18.26]


In [18]:
### We want to know how much of the total transaction amount was due to the offer ###
# Step 1: Sum total_transaction_amount and total_transactions per customer_id
# group aggregated by customer_id. 
total_offer_transactions = aggregated.groupby('customer_id').agg(
                                                                total_offer_transaction_amount=('total_transaction_amount', 'sum'),
                                                                total_offer_transactions=('total_transactions', 'sum')
                                                            ).reset_index()

# merge total_transactions to total_offer_transactions on customer_id
transaction_ratio = pd.merge(total_offer_transactions, total_transactions, on='customer_id')

#Merge offers_per_cust to transaction_ratio on customer_id
transaction_ratio = pd.merge(offers_per_cust, transaction_ratio, on='customer_id')
transaction_ratio.head()

Unnamed: 0,customer_id,num_offers_received,num_offers_viewed,num_offers_completed,total_offer_transaction_amount,total_offer_transactions,total_transactions,total_transaction_amount
0,00715b6e55c3431cb56ff7307eb19675,6,4,5,27.26,1,15,375.12
1,00d791e20c564add8056498e40eb56cc,5,5,4,5.26,1,19,59.62
2,00840a2ca5d2408e982d56544dc14ffd,5,2,3,22.29,5,17,62.93
3,0020c2b971eb4e9188eac86d93036a77,5,4,3,17.63,1,8,196.86
4,0091d2b6a5ea4defaa8393e4e816db60,4,4,4,18.26,1,12,279.16
