In [None]:
"""
Event log (306648 events x 4 fields)
Logs all events related to offers and transactions, such as offer receipt, offer view, transaction amount, and offer completion, along with timestamps
    event (str) - record description (ie transaction, offer received, offer viewed, etc.)
    person (str) - customer id
    time (int) - time in hours since start of test. The data begins at time t=0
    value: (dictionary) different values depending on event type
        offer id: (string/hash) not associated with any "transaction"
        amount: (numeric) money spent in "transaction"
        reward: (numeric) money gained from "offer completed"

"""
import pandas as pd
transcript = pd.read_json(r'data\01_raw\transcript.json', lines=True)

# Normalize the 'value' column and join it with the original DataFrame
transcript = transcript.join(pd.json_normalize(transcript['value']))

#Rename columns
transcript = transcript.rename(columns={
                                        'person': 'customer_id',
                                        'offer id': 'offer_id', 
                                        'offer_id': 'offer_reward_id',
                                        'time': 'time_hrs',
                                        'amount': 'transaction_amount',
                                        }
                               )

#Drop the 'value' column
transcript = transcript.drop(['value'], axis=1)

#Coalesce the 'offer_id' and 'offer_reward_id' columns
transcript['offer_id'] = transcript['offer_id'].combine_first(transcript['offer_reward_id'])

#Sort the data by 'customer_id' and 'time'
transcript = transcript.sort_values(['customer_id', 'time_hrs'])

#Reset the index
transcript = transcript.reset_index(drop=True)

#Re-arange the columns
cols = ['customer_id', 'offer_id', 'event',  'time_hrs', 'transaction_amount', 'reward']
transcript = transcript[cols]

#Save the data
transcript.to_csv(r'data\02_stg\stg_transcript.csv', index=False)
transcript.to_pickle(r'data\02_stg\stg_transcript.pkl')

transcript.head()

Unnamed: 0,customer_id,offer_id,event,time_hrs,transaction_amount,reward
0,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,offer received,168,,
1,0009655768c64bdeb2e877511632db8f,5a8bc65990b245e5a138643cd4eb9837,offer viewed,192,,
2,0009655768c64bdeb2e877511632db8f,,transaction,228,22.16,
3,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,offer received,336,,
4,0009655768c64bdeb2e877511632db8f,3f207df678b143eea3cee63160fa8bed,offer viewed,372,,
