# 03_Feature_Engineering.ipynb

## 1. Create 7‑day & 30‑day rolling order counts  
## 2. Compute recency per customer  
## 3. Save features to `data/processed/features.csv`


In [10]:
import pandas as pd

# Load cleaned transactions
df = pd.read_csv("../data/processed/cleaned_transactions.csv")
df['order_date'] = pd.to_datetime(df['order_date'])

# 1️⃣ 7‑day rolling order count per customer
rolling_7 = (
    df
    .set_index('order_date')
    .groupby('customer_id')['order_id']
    .rolling('7D')
    .count()
    .reset_index(name='orders_7d')
)

# Preview
rolling_7.head()


Unnamed: 0,customer_id,order_date,orders_7d
0,C001,2025-01-01,1.0
1,C001,2025-01-03,2.0
2,C002,2025-01-02,1.0


In [11]:
# 2️⃣ 30‑day rolling order count per customer
rolling_30 = (
    df
    .set_index('order_date')
    .groupby('customer_id')['order_id']
    .rolling('30D')
    .count()
    .reset_index(name='orders_30d')
)


# 3️⃣ Customer recency: days since last order
last_order = df.groupby('customer_id')['order_date'].max().reset_index()
last_order.columns = ['customer_id', 'last_order_date']

# Merge recency back onto rolling_30 for illustration
recency = pd.merge(rolling_30, last_order, on='customer_id')
recency['recency_days'] = (pd.Timestamp.today() - recency['last_order_date']).dt.days

recency.head()


Unnamed: 0,customer_id,order_date,orders_30d,last_order_date,recency_days
0,C001,2025-01-01,1.0,2025-01-03,104
1,C001,2025-01-03,2.0,2025-01-03,104
2,C002,2025-01-02,1.0,2025-01-02,105


In [12]:
# 4️⃣ Combine 7d & 30d features and recency
feats = (
    rolling_7
    .merge(rolling_30, on=['customer_id','order_date'], how='outer')
    .merge(last_order, on='customer_id', how='left')
)
feats['recency_days'] = (pd.Timestamp.today() - feats['last_order_date']).dt.days

feats.head()


Unnamed: 0,customer_id,order_date,orders_7d,orders_30d,last_order_date,recency_days
0,C001,2025-01-01,1.0,1.0,2025-01-03,104
1,C001,2025-01-03,2.0,2.0,2025-01-03,104
2,C002,2025-01-02,1.0,1.0,2025-01-02,105


In [15]:
import os

# Ensure the processed data folder exists
os.makedirs("../data/processed", exist_ok=True)

# Persist the features DataFrame
feats.to_csv("../data/processed/features.csv", index=False)

print("Features saved to ../data/processed/features.csv")


Features saved to ../data/processed/features.csv
