In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import joblib

In [2]:
df = pd.read_csv(r"C:\Users\guna5\OneDrive\Desktop\HOME-LOAN\DataSets\Fixed_Deposit_6.75_BaseRate.csv")

In [3]:
X = df.drop(columns=['BPS Adjustment (%)'])
y = df['BPS Adjustment (%)']

In [4]:
categorical_cols = ['Gender', 'Marital Status', 'Employment', 'Base Rate Type']
numeric_cols = ['Age', 'Income (₹)', 'FD Amount (₹)', 'FD Tenure (yrs)', 'Credit Score', 'Base Rate (%)']

In [5]:
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_cols),
    ('cat', OneHotEncoder(drop='first'), categorical_cols)
])

In [6]:
model_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
model_pipeline.fit(X_train, y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('num', StandardScaler(),
                                                  ['Age', 'Income (₹)',
                                                   'FD Amount (₹)',
                                                   'FD Tenure (yrs)',
                                                   'Credit Score',
                                                   'Base Rate (%)']),
                                                 ('cat',
                                                  OneHotEncoder(drop='first'),
                                                  ['Gender', 'Marital Status',
                                                   'Employment',
                                                   'Base Rate Type'])])),
                ('model', RandomForestRegressor(random_state=42))])

In [9]:
fd_model_objects = {
    'model': model_pipeline.named_steps['model'],
    'scaler': model_pipeline.named_steps['preprocessor'].transformers_[0][1],  
    'label_encoders': {},  
    'categorical_columns': categorical_cols,
    'feature_columns': X.columns.tolist()
}

joblib.dump(model_pipeline, 'fd_model.pkl')

['fd_model.pkl']

In [10]:
print("Model's expected feature columns:", fd_model_objects['feature_columns'])

Model's expected feature columns: ['Age', 'Gender', 'Marital Status', 'Employment', 'Income (₹)', 'FD Amount (₹)', 'FD Tenure (yrs)', 'Credit Score', 'Base Rate (%)', 'Base Rate Type']
