# Import Libraries and Data

In [47]:
import pandas as pd
import numpy as np

In [48]:
transactions = pd.read_pickle(r'data\03_int\int_transactions.pkl')
offers = pd.read_pickle(r'data/03_int/int_offers.pkl')
offers_per_cust = pd.read_pickle(r'data\04_fct\fct_offers_per_customer.pkl')
trans_per_cust = pd.read_pickle(r'data\04_fct\fct_transactions_per_cust.pkl')

# Match Transactions and Offers

In [49]:
# Join the transactions and offers dataframes on 'customer_id'
merged = pd.merge(transactions, offers, on='customer_id')

# Sort by 'transaction' and 'offer_received'
merged.sort_values(['transaction', 'offer_received'], inplace=True)

# Filter transactions that occurred during the offer period
filtered_transactions = merged[(merged['transaction'] >= merged['offer_received']) & 
                               (merged['transaction'] <= merged['offer_completed'])]

# Aggregate the filtered transactions
aggregated_transactions = filtered_transactions.groupby(['customer_id', 'offer_id']).agg(
    total_transactions=('transaction_amount', 'count')
    ,total_transaction_amount=('transaction_amount', 'sum')
    # ,transaction_list=('transaction', list) # uncomment to use as a check
    # ,amount_list=('transaction_amount', list) # uncomment to use as a check
).reset_index()

aggregated_transactions['total_transaction_amount'] = round(aggregated_transactions['total_transaction_amount'], 2)

aggregated_transactions.sort_values(by='total_transactions', ascending=False, inplace=True)
aggregated_transactions.head()


Unnamed: 0,customer_id,offer_id,total_transactions,total_transaction_amount
442,041fd7c6b796430a867c4e03d2ff8f4d,0b1e1539f2cc45b7b9fa7c272da2e1d7,22,78.37
11760,677605ce51a948d4ba74fa54229119e4,f19421c1d4aa40978ebb69ca19b0e20d,20,107.49
8169,479134f6760f4d69b5f368075ac1aca1,4d5c57ea9a6940dd891ad53e9dbe8da0,18,49.4
27713,f473bfcb688e4c42be46bb9441577e06,2906b810c7d4411798c6938adc9daaa5,18,70.9
13228,74fcd98b46fc47cba9423e23ce0b294f,0b1e1539f2cc45b7b9fa7c272da2e1d7,18,59.72


In [50]:
df_matched = pd.merge(offers, aggregated_transactions, on=['customer_id', 'offer_id'], how='left')

# Fill NaN values with 0
df_matched['total_transactions'] = df_matched['total_transactions'].fillna(0)
df_matched['total_transaction_amount'] = df_matched['total_transaction_amount'].fillna(0)

# Save data
df_matched.to_pickle(r'data\04_fct\fct_matched_offers.pkl')
df_matched.to_csv(r'data\04_fct\fct_matched_offers.csv', index=False)

df_matched.head()

Unnamed: 0,customer_id,offer_id,offer_received,offer_viewed,offer_completed,viewed_before_completion,difficulty,reward,duration_hrs,email,mobile,social,web,is_bogo,is_discount,is_informational,expiration,total_transactions,total_transaction_amount
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,168.0,192.0,,,0,0,72,1,1,1,0,0,0,1,240.0,0.0,0.0
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,336.0,372.0,,,0,0,96,1,1,0,1,0,0,1,432.0,0.0,0.0
2,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,408.0,456.0,414.0,0.0,5,5,120,1,1,1,1,1,0,0,528.0,1.0,8.57
3,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,504.0,540.0,528.0,0.0,10,2,240,1,1,1,1,0,1,0,744.0,1.0,14.11
4,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,576.0,,576.0,0.0,10,2,168,1,1,0,1,0,1,0,744.0,1.0,10.27


# Join the aggregated transaction data with the offer data

In [51]:
### We want to know how much of the total transaction amount was due to the offers ###
# Sum total_transaction_amount and total_transactions per customer_id
total_offer_transactions = aggregated_transactions.groupby('customer_id').agg(
                                                                total_offer_transaction_amount=('total_transaction_amount', 'sum'),
                                                                total_offer_transactions=('total_transactions', 'sum')
                                                            ).reset_index()

# Merge total_transactions to total_offer_transactions on customer_id
transaction_ratio = pd.merge(total_offer_transactions, trans_per_cust, on='customer_id')

# Merge offers_per_cust to transaction_ratio on customer_id
transaction_ratio = pd.merge(offers_per_cust, transaction_ratio, on='customer_id')
transaction_ratio.sort_values(by='total_transactions', ascending=False, inplace=True)

# Export Data
transaction_ratio.to_pickle(r'data\04_fct\fct_agg_transactions_and_offers.pkl')
transaction_ratio.to_csv(r'data\04_fct\fct_agg_transactions_and_offers.csv', index=False)

transaction_ratio.head()

Unnamed: 0,customer_id,offers_received,offers_viewed,informational_offers,offers_completed,offers_viewed_before_completion,viewed_before_completion_reward,viewed_after_completion_reward,eligible_rewards,percent_of_offers_viewed,percent_of_offers_completed,percent_of_offers_viewed_before_completion,total_offer_transaction_amount,total_offer_transactions,total_transactions,total_transaction_amount
1800,79d9d4f86aca4bed9290350fb43817c2,5,3,1,4,2.0,5,7,12,0.6,1.0,0.5,110.66,15,36,173.41
469,8dbfa485249f409aa223a2130f40634a,5,5,0,3,3.0,10,0,30,1.0,0.6,1.0,44.76,22,36,76.46
1704,94de646f7b6041228ca7dec82adb97d2,5,4,0,4,3.0,12,5,27,0.8,0.8,0.75,55.85,17,35,90.23
4942,b1f4ece7d49342628a9ed77aee2cde58,4,4,0,4,4.0,12,0,12,1.0,1.0,1.0,88.18,23,32,133.02
272,5e60c6aa3b834e44b822ea43a3efea26,6,5,1,5,4.0,20,5,25,0.833333,1.0,0.8,85.54,25,32,103.66
