There are couple different types of transactions:
 * Transactions that helped complete the offer 
 * Transactions that completed the offer -> done
 * Transactions that prompted by an information add
 * Transactions that had nothing to do with any of the offers

We need to identify all of these transactions

In [51]:
import pandas as pd
import numpy as np

# Load the transcript data
transcript = pd.read_pickle(r'data\02_stg\stg_transcript.pkl')

# Filter on transactions
transactions = transcript[transcript['event'] == 'transaction']

#Drop unneeded columns
transactions = transactions.drop(columns=['offer_id', 'event', 'reward'])

#Rename columns
transactions = transactions.rename(columns={'time_hrs': 'transaction'})

#Reset index
transactions = transactions.reset_index(drop=True)

#save the data
transactions.to_csv(r'data\03_int\int_transactions.csv', index=False)
transactions.to_pickle(r'data\03_int\int_transactions.pkl')

transactions.head()

Unnamed: 0,customer_id,transaction,transaction_amount
0,0009655768c64bdeb2e877511632db8f,228,22.16
1,0009655768c64bdeb2e877511632db8f,414,8.57
2,0009655768c64bdeb2e877511632db8f,528,14.11
3,0009655768c64bdeb2e877511632db8f,552,13.56
4,0009655768c64bdeb2e877511632db8f,576,10.27


In [54]:
### Merge completed offers with transactions ###
#Load offer data
offers = pd.read_pickle(r'data/03_int/int_transcript_offers.pkl')

#Filter on completed offers
completed_offers = offers[offers['offer_completed'].notna()]

#Left join transaction that completed the offer
comp_offers_trans = completed_offers.merge(transactions, 
                         left_on=['customer_id', 'offer_completed'], 
                         right_on=['customer_id', 'transaction'], 
                         how='left')

comp_offers_trans = comp_offers_trans.drop(columns=['transaction', 'duration_hrs'])

#Check offer completion required multiple transactions
comp_offers_trans['req_mult_trans'] = comp_offers_trans['difficulty'] > comp_offers_trans['transaction_amount'] 
comp_offers_trans.head()

Unnamed: 0,customer_id,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,expiration,reward,transaction_amount,req_mult_trans
0,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,bogo,408.0,456.0,414.0,6.0,0.0,5,528.0,5.0,8.57,False
1,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,discount,504.0,540.0,528.0,24.0,0.0,10,744.0,2.0,14.11,False
2,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,discount,576.0,,576.0,0.0,-1.0,10,744.0,2.0,10.27,False
3,0011e0d4e6b944f998e987f904e8c1e5,2298d6c36e964ae4a3e7e9706d1fb8c2,discount,168.0,186.0,252.0,66.0,2.0,7,336.0,3.0,11.93,False
4,0011e0d4e6b944f998e987f904e8c1e5,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,408.0,432.0,576.0,144.0,2.0,20,648.0,5.0,22.05,False


In [55]:
def get_customer_data(customer_id, df, output_path):
    output_df = df[df['customer_id'] == customer_id]
    output_df.to_csv(output_path, index=False)
    return output_df

comp_offers_trans = get_customer_data(
                                '0009655768c64bdeb2e877511632db8f', 
                                comp_offers_trans, 
                                r'data\03_int\int_example_customer_1_completed_offers_transactions.csv'
                                )
comp_offers_trans.head()

Unnamed: 0,customer_id,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,expiration,reward,transaction_amount,req_mult_trans
0,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,bogo,408.0,456.0,414.0,6.0,0.0,5,528.0,5.0,8.57,False
1,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,discount,504.0,540.0,528.0,24.0,0.0,10,744.0,2.0,14.11,False
2,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,discount,576.0,,576.0,0.0,-1.0,10,744.0,2.0,10.27,False


In [53]:
### Identify the transactions that were not used to complete an offer ###
# Filter on uncompleted offers
uncompleted_offers = offers[offers['offer_completed'].isna()]

# Find orphan transactions 
orphan_transactions = transactions.merge(completed_offers, 
                        left_on=['customer_id', 'transaction'], 
                        right_on=['customer_id', 'offer_completed'], 
                        how='left')
# Filter out transactions that completed an offer
orphan_transactions = orphan_transactions[orphan_transactions['offer_completed'].isna()]

# Select columns
orphan_transactions = orphan_transactions[['customer_id', 'transaction', 'transaction_amount']]

orphan_transactions.head()

Unnamed: 0,customer_id,transaction,transaction_amount
0,0009655768c64bdeb2e877511632db8f,228,22.16
3,0009655768c64bdeb2e877511632db8f,552,13.56
5,0009655768c64bdeb2e877511632db8f,660,12.36
6,0009655768c64bdeb2e877511632db8f,690,28.16
7,0009655768c64bdeb2e877511632db8f,696,18.41
