### CFPI MOMENTUM DIRECTION PREDICTION

### 1. Feature + label construction

In [1]:

import pandas as pd
import numpy as np

FEATURE_COLS = [
    "fuel_pressure",
    "index",
    "fx_pressure",
    "policy_pressure",
    "cfpi_momentum_1m",
    "cfpi_momentum_3m",
    "cfpi_volatility_3m"
]

def add_ml_features(df):
    """
    Adds ML features needed for CFPI direction prediction
    """
    df = df.sort_values(["state", "date"]).copy()

    df["cfpi_momentum_1m"] = df.groupby("state")["cfpi"].diff(1)
    df["cfpi_momentum_3m"] = df.groupby("state")["cfpi"].diff(3)

    df["cfpi_volatility_3m"] = (
        df.groupby("state")["cfpi"]
        .rolling(3)
        .std()
        .reset_index(level=0, drop=True)
    )

    return df


def build_ml_dataset(df):
    """
    Training dataset builder
    """
    df = add_ml_features(df)

    df["cfpi_next"] = df.groupby("state")["cfpi"].shift(-1)
    df["target_up"] = (df["cfpi_next"] > df["cfpi"]).astype(int)

    ml_df = df[FEATURE_COLS + ["target_up"]].dropna()

    X = ml_df[FEATURE_COLS]
    y = ml_df["target_up"]

    return X, y, FEATURE_COLS


### Model training + evaluation

In [2]:
import joblib
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import roc_auc_score, classification_report

MODEL_PATH = "../models/cfpi_direction_model.pkl"

def train_model(df):
    X, y, feature_cols = build_ml_dataset(df)

    tscv = TimeSeriesSplit(n_splits=5)
    auc_scores = []

    for train_idx, test_idx in tscv.split(X):
        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        model = LogisticRegression(max_iter=1000)
        model.fit(X_train, y_train)

        y_pred_prob = model.predict_proba(X_test)[:, 1]
        auc = roc_auc_score(y_test, y_pred_prob)
        auc_scores.append(auc)

    print(f"Mean CV AUC: {sum(auc_scores) / len(auc_scores):.3f}")

    # Train final model on full data
    final_model = LogisticRegression(max_iter=1000)
    final_model.fit(X, y)

    joblib.dump(
        {"model": final_model, "features": feature_cols},
        MODEL_PATH
    )

    print("Model saved to", MODEL_PATH)


df = pd.read_csv("../data/data.csv", parse_dates=["date"])
train_model(df)


Mean CV AUC: 0.849
Model saved to ../models/cfpi_direction_model.pkl


### 3. Prediction using ML Model

In [6]:
MODEL_PATH = "../models/cfpi_direction_model.pkl"
OUTPUT_PATH = "../data/cfpi_direction_forecast.csv"

def predict_direction(df):
    bundle = joblib.load(MODEL_PATH)
    model = bundle["model"]

    df = add_ml_features(df)

    df = df.sort_values(["state", "date"]).copy()
    latest = df.groupby("state").tail(1)

    X_latest = latest[FEATURE_COLS].dropna()
    probs = model.predict_proba(X_latest)[:, 1]

    output = latest.loc[X_latest.index, ["date", "state", "cfpi"]].copy()
    output["prob_cfpi_up_next_month"] = probs
    output["direction_signal"] = (
        output["prob_cfpi_up_next_month"] > 0.6
    ).map({True: "Likely Increase", False: "Stable / Decrease"})

    output.to_csv(OUTPUT_PATH, index=False)
    print("CFPI direction forecast generated.")



df = pd.read_csv("../data/data.csv", parse_dates=["date"])
predict_direction(df)


CFPI direction forecast generated.


In [None]:
MODEL_PATH = "../models/cfpi_direction_model.pkl"
OUTPUT_PATH = "../data/cfpi_direction_forecast.csv"

def predict_direction(df):
    bundle = joblib.load(MODEL_PATH)
    model = bundle["model"]

    df = add_ml_features(df)

    df = df.sort_values(["state", "date"]).copy()
    latest = df.groupby("state").tail(1)

    X_latest = latest[FEATURE_COLS].dropna()
    probs = model.predict_proba(X_latest)[:, 1]

    output = latest.loc[X_latest.index, ["date", "state", "cfpi"]].copy()
    output["prob_cfpi_up_next_month"] = probs
    output["direction_signal"] = (
        output["prob_cfpi_up_next_month"] > 0.6
    ).map({True: "Likely Increase", False: "Stable / Decrease"})

    output.to_csv(OUTPUT_PATH, index=False)
    print("CFPI direction forecast generated.")



df = pd.read_csv("../data/data.csv", parse_dates=["date"])
predict_direction(df)


array([0.50397286, 0.30503292, 0.21098839, 0.08624679, 0.14373794,
       0.2304521 , 0.58404015, 0.10840165, 0.28983254, 0.37538625,
       0.33692419, 0.17859821, 0.16830379, 0.12506242, 0.34988533,
       0.40911084])