## 4 -> Customer Metrics

### Reading Data

In [22]:
import pandas as pd
import numpy as np

df = pd.read_csv('../data/processed/feature_engineered.csv')
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

### Calculating Customer Metrics

In [23]:
customer_metrics = df.groupby('CustomerID').agg({
  'InvoiceNo':'count', #total no of orders per customer
  'TotalAmount':['sum','mean'], #total amount ordered and average order amount
  'InvoiceDate': ['min', 'max'], #first and last purchase date
}).round(2)

In [24]:
customer_metrics.columns = ['order_count', 'total_spend', 'avg_order_value', 'first_purchase',  'last_purchase']

In [25]:
customer_metrics['customer_lifespan'] = (
  pd.to_datetime(customer_metrics['last_purchase']) - pd.to_datetime(customer_metrics['first_purchase'])
).dt.days

In [26]:
customer_metrics['customer_lifespan'] = np.where(customer_metrics['customer_lifespan'] == 0, 1, customer_metrics['customer_lifespan'])

customer_metrics['purchase_frequency'] = (
  customer_metrics['order_count'] / 
    (customer_metrics['customer_lifespan'] / 30)
).round(2)

In [27]:
customer_metrics.head()

Unnamed: 0_level_0,order_count,total_spend,avg_order_value,first_purchase,last_purchase,customer_lifespan,purchase_frequency
CustomerID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
12346,1,77183.6,77183.6,2011-01-18 10:01:00,2011-01-18 10:01:00,1,30.0
12347,182,4310.0,23.68,2010-12-07 14:57:00,2011-12-07 15:52:00,365,14.96
12348,31,1797.24,57.98,2010-12-16 19:09:00,2011-09-25 13:13:00,282,3.3
12349,73,1757.55,24.08,2011-11-21 09:51:00,2011-11-21 09:51:00,1,2190.0
12350,17,334.4,19.67,2011-02-02 16:01:00,2011-02-02 16:01:00,1,510.0


### Exporting Data

In [28]:
customer_metrics.to_csv('../data/processed/customer_metrics.csv')