In [5]:
import pandas as pd
import sys
import os

# Add the source folder to the system path
sys.path.append(os.path.abspath('../src'))

# Now import the functions DIRECTLY from the module (no `src.` prefix!)
from feature_engineering import (
    extract_time_features,
    generate_aggregate_features,
    generate_rfm_features
)

# Load your raw data
df = pd.read_csv('../data/processed/cleaned_data_with_rfm.csv', parse_dates=['TransactionStartTime'])

# 1. Add time-based features
df = extract_time_features(df)

# 2. Generate aggregate & RFM features
agg_features = generate_aggregate_features(df)
rfm_features = generate_rfm_features(df)

# 3. Merge with original dataframe on CustomerId
df = df.merge(agg_features, on='CustomerId', how='left')
df = df.merge(rfm_features, on='CustomerId', how='left')

# Optional: Drop duplicates or unused columns
# df.drop(columns=['Unnamed: 16', 'Unnamed: 17'], inplace=True)

# Save to processed
df.to_csv('../data/processed/processed_transactions.csv', index=False)
print("✅ Processed data saved.")


✅ Processed data saved.
