In [1]:
import pandas as pd

users = pd.read_csv("../data/users.csv", parse_dates=["signup_date"])
transactions = pd.read_csv("../data/transactions.csv", parse_dates=["transaction_time"])
repayments = pd.read_csv("../data/repayments.csv", parse_dates=["due_date", "repayment_date"])

# Transaction features
transactions["hour"] = transactions["transaction_time"].dt.hour
transactions["is_night_txn"] = transactions["hour"].apply(lambda x: 1 if x >= 23 or x <= 5 else 0)

txn_features = transactions.groupby("user_id").agg(
    txn_count=("transaction_id", "count"),
    avg_amount=("amount", "mean"),
    night_txn_ratio=("is_night_txn", "mean"),
    total_spend=("amount", "sum")
).reset_index()

# Repayment features
repayments["delay_days"] = (
    repayments["repayment_date"] - repayments["due_date"]
).dt.days

repayment_features = repayments.groupby("user_id").agg(
    default_count=("repayment_status", lambda x: (x == "default").sum()),
    avg_delay=("delay_days", "mean")
).reset_index()

features = (
    users
    .merge(txn_features, on="user_id", how="left")
    .merge(repayment_features, on="user_id", how="left")
)

features.fillna(0, inplace=True)
features.to_csv("../data/features.csv", index=False)
