In [1]:
pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

# Set seed for reproducibility
np.random.seed(42)
n_samples = 1500 # Increased sample size for better rule learning

# --- 1. Synthetic Data Generation ---
data = {
    # CPI Inflation: Target 4% (+/- 2%)
    'CPI_Inflation': np.random.uniform(2.0, 8.5, n_samples),
    # GDP Growth
    'GDP_Growth': np.random.uniform(3.0, 9.0, n_samples),
    # Crude Oil (Geopolitical Proxy)
    'Crude_Oil_Price': np.random.uniform(50, 110, n_samples),
    # RBI Sentiment: -1 (Hawkish) to +1 (Dovish)
    'RBI_Sentiment': np.random.uniform(-1, 1, n_samples),
    # FinMin Sentiment: -1 (Concerned) to +1 (Pro-Growth)
    'FinMin_Sentiment': np.random.uniform(-0.5, 1, n_samples) 
}
df = pd.DataFrame(data)

# --- 2. Define Economic Logic (The Rules) ---
# Target Encoding: 0 = CUT, 1 = PAUSE, 2 = HIKE
def determine_rate_decision(row):
    # Rule 1: Crisis Inflation -> HIKE
    if row['CPI_Inflation'] > 6.0:
        return 2 
    # Rule 2: Geopolitical Shock (High Oil + Hawkish RBI) -> HIKE
    if row['Crude_Oil_Price'] > 95 and row['RBI_Sentiment'] < -0.2:
        return 2
        
    # Rule 3: Recession / Low Growth -> CUT
    if row['GDP_Growth'] < 5.0 and row['CPI_Inflation'] < 5.5:
        return 0
        
    # Rule 4: "Growth Push" (FinMin wants growth, Inflation manageable) -> CUT
    if row['FinMin_Sentiment'] > 0.5 and row['CPI_Inflation'] < 5.0:
        return 0
        
    # Default: PAUSE (Wait & Watch)
    return 1

df['Rate_Decision'] = df.apply(determine_rate_decision, axis=1)

X = df[['CPI_Inflation', 'GDP_Growth', 'Crude_Oil_Price', 'RBI_Sentiment', 'FinMin_Sentiment']]
y = df['Rate_Decision']

# --- 3. Train Model ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# UPDATED: Removed 'use_label_encoder=False' to silence the warning
model = xgb.XGBClassifier(
    objective='multi:softprob', 
    num_class=3,
    eval_metric='mlogloss',
    n_estimators=200, 
    learning_rate=0.05
)
model.fit(X_train, y_train)

# --- 4. Save Model V1 ---
filename = 'rbi_rate_model_v1.pkl'
pickle.dump(model, open(filename, 'wb'))

print(f"Model V1 Saved as {filename}!")
print("Model Performance:")
print(classification_report(y_test, model.predict(X_test)))

Model V1 Saved as rbi_rate_model_v1.pkl!
Model Performance:
              precision    recall  f1-score   support

           0       0.99      0.97      0.98        76
           1       0.98      1.00      0.99        94
           2       1.00      0.99      1.00       130

    accuracy                           0.99       300
   macro avg       0.99      0.99      0.99       300
weighted avg       0.99      0.99      0.99       300

