In [85]:
import pandas as pd
import numpy as np

# Load the transcript data
transcript = pd.read_pickle(r'data\02_stg\stg_transcript.pkl')
offers = pd.read_pickle(r'data\02_stg\stg_transcript.pkl')
portfolio = pd.read_pickle(r'data\02_stg\stg_portfolio_featurized_channels.pkl')

#Drop the 'transaction' rows
offers = offers[offers['event'] != 'transaction']

offers.head()

Unnamed: 0,customer_id,offer_id,event,time_hrs,transaction_amount,reward
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,offer received,168,,
1,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,offer viewed,192,,
3,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,offer received,336,,
4,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,offer viewed,372,,
5,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,offer received,408,,


In [86]:
#Group transcript by customer_id and count the number of offers received
offers_received = offers.groupby('customer_id')['offer_id'].count().reset_index()
offers_received.columns = ['customer_id', 'offers_received']
offers_received = offers_received.sort_values('offers_received', ascending=False)
offers_received.head()

#Looks like all customers received an offer during the experiment

Unnamed: 0,customer_id,offers_received
16993,ffff82501cea40309d5fdd7edcca4a07,18
12654,bd2cdd691aca4bb0a0e039979ee5de5c,18
12837,c0231649f05d40889e3a6e1172303b37,18
8385,7e45faba0a8346379d0c296f1df8ae1d,18
2135,2193267a91d747dfa7197bb39c78b2e5,18


In [87]:
portfolio.head()

Unnamed: 0,offer_id,offer_type,difficulty,reward,duration_days,duration_hrs,email,mobile,social,web
0,ae264e3637204a6fb9bb56bc8210ddfd,bogo,10,10,7,168,1,1,1,0
1,4d5c57ea9a6940dd891ad53e9dbe8da0,bogo,10,10,5,120,1,1,1,1
2,3f207df678b143eea3cee63160fa8bed,informational,0,0,4,96,1,1,0,1
3,9b98b8c7a33c4b65b9aebfe6a799e6d9,bogo,5,5,7,168,1,1,0,1
4,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,20,5,10,240,1,0,0,1


In [97]:
### Here I'm pivoting out the offer events so that we can understand the funnel completion rates ###
# Pivot out the events
df_pivot_offers = offers.pivot_table(index=['customer_id', 'offer_id'], 
                            values='time_hrs', 
                            columns='event', 
                            aggfunc='first').reset_index()
#Re order columns
df_pivot_offers = df_pivot_offers[['customer_id', 'offer_id', 'offer received', 'offer viewed', 'offer completed']]

#Rename columns
df_pivot_offers.columns = ['customer_id', 'offer_id', 'offer_received', 'offer_viewed', 'offer_completed']

def time_to_complete(row):
    # Check if the offer was viewed
    offer_viewed = pd.notnull(row['offer_viewed'])
    # Check if the offer was completed
    offer_completed = pd.notnull(row['offer_completed'])
    
    # If the offer was viewed and completed, and the time of viewing is less than or equal to the time of completion
    if offer_viewed and offer_completed and row['offer_viewed'] <= row['offer_completed']:
        # Return the time difference between completion and viewing
        return row['offer_completed'] - row['offer_viewed']
    # If the offer was not viewed but was completed
    elif offer_completed:
        # Return the time difference between completion and receiving the offer
        return row['offer_completed'] - row['offer_received']
    else:
        # If the offer was neither viewed nor completed, return NaN
        return np.nan

# Apply the function to each row
df_pivot_offers['time_to_completion'] = df_pivot_offers.apply(time_to_complete, axis=1)

#Sort columns by customer_id and offer_received to make customer segmentation easier
df_pivot_offers = df_pivot_offers.sort_values(by=['customer_id', 'offer_received'])

#Reset index
df_pivot_offers = df_pivot_offers.reset_index(drop=True)

df_pivot_offers.head()

Unnamed: 0,customer_id,offer_id,offer_received,offer_viewed,offer_completed,time_to_completion
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,168.0,192.0,,
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,336.0,372.0,,
2,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,408.0,456.0,414.0,6.0
3,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,504.0,540.0,528.0,24.0
4,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,576.0,,576.0,0.0


In [98]:
### Here I'm creating a dataframe so that we join rewards to offers completed###

#Create a new dataframe 'offers_completed' where event = 'offer completed' and reward is not null
offers_completed = offers[(offers['event'] == 'offer completed') & (offers['reward'].notnull())]

#Drop columns
offers_completed = offers_completed.drop(columns=['event', 'time_hrs', 'transaction_amount'])

# Display the first few rows
offers_completed.head()

Unnamed: 0,customer_id,offer_id,reward
7,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,5.0
11,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,2.0
16,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,2.0
33,0011e0d4e6b944f998e987f904e8c1e5,2298d6c36e964ae4a3e7e9706d1fb8c2,3.0
41,0011e0d4e6b944f998e987f904e8c1e5,0b1e1539f2cc45b7b9fa7c272da2e1d7,5.0


In [99]:
#Merge df_pivot_offers with offers_completed
df_merged = pd.merge(df_pivot_offers, offers_completed, on=['customer_id', 'offer_id'], how='left')
df_merged.head()

Unnamed: 0,customer_id,offer_id,offer_received,offer_viewed,offer_completed,time_to_completion,reward
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,168.0,192.0,,,
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,336.0,372.0,,,
2,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,408.0,456.0,414.0,6.0,5.0
3,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,504.0,540.0,528.0,24.0,2.0
4,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,576.0,,576.0,0.0,2.0


In [100]:
# Select the columns to merge
portfolio_subset = portfolio[['offer_id', 'offer_type', 'difficulty', 'duration_hrs']]

# Merge df_merged with the subset of portfolio
df_merged = pd.merge(df_merged, portfolio_subset, on='offer_id', how='left')

# Created 'expiration' column
df_merged['expiration'] = df_merged['offer_received'] + df_merged['duration_hrs']

#Re- arrange columns
df_merged = df_merged[['customer_id', 'offer_id', 'offer_type', 'difficulty', 'duration_hrs', 'offer_received', 'offer_viewed', 'offer_completed', 'time_to_completion','expiration', 'reward']]

# Display the first few rows
df_merged.head()

Unnamed: 0,customer_id,offer_id,offer_type,difficulty,duration_hrs,offer_received,offer_viewed,offer_completed,time_to_completion,expiration,reward
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,informational,0,72,168.0,192.0,,,240.0,
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,informational,0,96,336.0,372.0,,,432.0,
2,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,bogo,5,120,408.0,456.0,414.0,6.0,528.0,5.0
3,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,discount,10,240,504.0,540.0,528.0,24.0,744.0,2.0
4,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,discount,10,168,576.0,,576.0,0.0,744.0,2.0


In [106]:
def calculate_offer_effect(row):
    offer_viewed = pd.notnull(row['offer_viewed'])
    offer_completed = pd.notnull(row['offer_completed'])
    
    if offer_viewed and offer_completed:
        if row['offer_viewed'] < row['offer_completed']:
            # The offer was viewed and completed. This is the ideal behavior
            return 2
        if row['offer_viewed'] == row['offer_completed']:
            # The offer was viewed roughly at the same time it was completed
            # Not ideal, but I am considering it as a positive effect because the customer is associating their purchase with the offer
            return 1
        else:
            # The offer was viewed after it was completed. This indicates that the customer was probably already going to make the purchase
            return 0
    elif offer_viewed and row['offer_viewed'] <= (row['expiration'] - 4):
        # The offer was viewed at least 4 hours before the expiration, but not completed
        return 0
    elif not offer_viewed and offer_completed:
        # The offer was not viewed, but completed
        return -1
    else:
        # The offer was not viewed and not completed
        return -2

# Apply the function to each row
df_merged['offer_effect'] = df_merged.apply(calculate_offer_effect, axis=1)

# re-arrange columns: customer_id, offer_id, offer_type, offer_received, offer_viewed, offer_completed, time_to_completion, offer_effect, difficulty, duration_hrs, expiration, reward
cols = ['customer_id', 'offer_id', 'offer_type', 'offer_received', 'offer_viewed', 'offer_completed', 'time_to_completion', 'offer_effect', 'difficulty', 'duration_hrs', 'expiration', 'reward']
df_merged = df_merged[cols]

# Display the first few rows
df_merged.head()

Unnamed: 0,customer_id,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,duration_hrs,expiration,reward
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,informational,168.0,192.0,,,0,0,72,240.0,
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,informational,336.0,372.0,,,0,0,96,432.0,
2,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,bogo,408.0,456.0,414.0,6.0,0,5,120,528.0,5.0
3,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,discount,504.0,540.0,528.0,24.0,0,10,240,744.0,2.0
4,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,discount,576.0,,576.0,0.0,-1,10,168,744.0,2.0


In [102]:
#Interesting. In this example, the customer is completing the offer before the offer is viewed.
#filter transcript where customer_id 0009655768c64bdeb2e877511632db8f and offer_id = fafdcd668e3743c1bb461111dcafc2a4
transcript[(transcript['customer_id'] == '0009655768c64bdeb2e877511632db8f') & (transcript['offer_id'] == 'fafdcd668e3743c1bb461111dcafc2a4')]

Unnamed: 0,customer_id,offer_id,event,time_hrs,transaction_amount,reward
9,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,offer received,504,,
11,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,offer completed,528,,2.0
12,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,offer viewed,540,,


In [103]:
example_customer_pivot = df_merged[df_merged['customer_id'] == '0009655768c64bdeb2e877511632db8f']

# re-arrange columns: customer_id, offer_id, offer_type, offer_received, offer_viewed, offer_completed, time_to_completion, offer_effect, difficulty, duration_hrs, expiration, reward
cols = ['customer_id', 'offer_id', 'offer_type', 'offer_received', 'offer_viewed', 'offer_completed', 'time_to_completion', 'offer_effect', 'difficulty', 'duration_hrs', 'expiration', 'reward']
example_customer_pivot = example_customer_pivot[cols]


example_customer_pivot.to_csv(r'data\03_int\int_example_customer_pivot.csv', index=False)
example_customer_pivot.head()

Unnamed: 0,customer_id,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,duration_hrs,expiration,reward
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,informational,168.0,192.0,,,0,0,72,240.0,
1,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,informational,336.0,372.0,,,0,0,96,432.0,
2,0009655768c64bdeb2e877511632db8f,f19421c1d4aa40978ebb69ca19b0e20d,bogo,408.0,456.0,414.0,6.0,0,5,120,528.0,5.0
3,0009655768c64bdeb2e877511632db8f,fafdcd668e3743c1bb461111dcafc2a4,discount,504.0,540.0,528.0,24.0,0,10,240,744.0,2.0
4,0009655768c64bdeb2e877511632db8f,2906b810c7d4411798c6938adc9daaa5,discount,576.0,,576.0,0.0,-1,10,168,744.0,2.0


In [104]:
example_customer_pivot_2 = df_merged[df_merged['customer_id'] == 'ffff82501cea40309d5fdd7edcca4a07']

# re-arrange columns: customer_id, offer_id, offer_type, offer_received, offer_viewed, offer_completed, time_to_completion, offer_effect, difficulty, duration_hrs, expiration, reward
cols = ['customer_id', 'offer_id', 'offer_type', 'offer_received', 'offer_viewed', 'offer_completed', 'time_to_completion', 'offer_effect', 'difficulty', 'duration_hrs', 'expiration', 'reward']
example_customer_pivot_2 = example_customer_pivot_2[cols]

example_customer_pivot_2.to_csv(r'data\03_int\int_example_customer_pivot_2.csv', index=False)
example_customer_pivot_2.head()

Unnamed: 0,customer_id,offer_id,offer_type,offer_received,offer_viewed,offer_completed,time_to_completion,offer_effect,difficulty,duration_hrs,expiration,reward
67865,ffff82501cea40309d5fdd7edcca4a07,fafdcd668e3743c1bb461111dcafc2a4,discount,0.0,6.0,60.0,54.0,2,10,240,240.0,2.0
67866,ffff82501cea40309d5fdd7edcca4a07,0b1e1539f2cc45b7b9fa7c272da2e1d7,discount,168.0,174.0,198.0,24.0,2,20,240,408.0,5.0
67867,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,discount,336.0,354.0,384.0,30.0,2,10,168,504.0,2.0
67868,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,discount,336.0,354.0,384.0,30.0,2,10,168,504.0,2.0
67869,ffff82501cea40309d5fdd7edcca4a07,2906b810c7d4411798c6938adc9daaa5,discount,336.0,354.0,384.0,30.0,2,10,168,504.0,2.0


In [105]:
# So none of the information offers have a time to completion. We can extrapolate by the expiration to see if there was an action taken
# # Filter df_merged where offer_type is 'information' and time_to_completion is not null
# info_offers = df_merged[(df_merged['offer_type'] == 'informational') & (df_merged['time_to_completion'].notnull())]
# info_offers.head()

Notes
- Customers completed the offer before they saw it
    - Probably need to come up with some metric for this. Probably a boolean 'completed_before_viewed' and 'completed_before_viewed_hrs'
- offer_type 'informational' do not have values for 'time_to_completion', but they do have expiration dates. We can use expiration dates to see if the information offer types trigger transactions
- We want to count the transactions that happened during the discount offers and outside the discount offers. We also want to know the sum of the transactions

We also want to look to see if there's some industry standard metrics around this