## 5 -> Customer Journey Analysis

### Importing Data

In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv('../data/processed/feature_engineered.csv')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

### Journey Events

In [2]:
def create_journey_events(df):
  events = df.sort_values(['CustomerID', 'InvoiceDate'])

  events['purchase_number'] = df.groupby('CustomerID').cumcount() + 1

  events['days_since_last_purchase'] = events.groupby('CustomerID')['InvoiceDate'].diff().dt.days

  return events

journey_events = create_journey_events(df)
print(journey_events.head())

       InvoiceNo StockCode                        Description  Quantity  \
37120     541431     23166     MEDIUM CERAMIC TOP STORAGE JAR     74215   
10515     537626     85116    BLACK CANDELABRA T-LIGHT HOLDER        12   
10516     537626     22375  AIRLINE BAG VINTAGE JET SET BROWN         4   
10517     537626     71477  COLOUR GLASS. STAR T-LIGHT HOLDER        12   
10518     537626     22492            MINI PAINT SET VINTAGE         36   

              InvoiceDate  UnitPrice  CustomerID         Country  TotalAmount  \
37120 2011-01-18 10:01:00       1.04       12346  United Kingdom      77183.6   
10515 2010-12-07 14:57:00       2.10       12347         Iceland         25.2   
10516 2010-12-07 14:57:00       4.25       12347         Iceland         17.0   
10517 2010-12-07 14:57:00       3.25       12347         Iceland         39.0   
10518 2010-12-07 14:57:00       0.65       12347         Iceland         23.4   

       Year  Month  DayOfWeek  HourOfDay  TimeOfDay  purchase_

### Journey Summary

In [3]:
journey_summary = journey_events.groupby('purchase_number').agg({
  'CustomerID': 'count',
  'TotalAmount': 'mean',
  'days_since_last_purchase': 'mean'
}).round(2)

journey_summary.columns = ['total_orders', 'avg_order_amount', 'avg_days_gap_between_purchases']
print(journey_summary.head())

                 total_orders  avg_order_amount  \
purchase_number                                   
1                        4338             57.24   
2                        4267             39.47   
3                        4213             69.69   
4                        4159             27.66   
5                        4111             27.38   

                 avg_days_gap_between_purchases  
purchase_number                                  
1                                           NaN  
2                                          1.84  
3                                          1.61  
4                                          1.99  
5                                          1.42  


### Exporting Data

In [4]:
journey_events.to_csv('../data/processed/customer_journey.csv', index=False)
journey_summary.to_csv('../data/processed/journey_summary.csv')