In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
import pickle

In [2]:

df = pd.read_csv(r"C:\Users\guna5\OneDrive\Desktop\HOME-LOAN\DataSets\updated-dataset.csv")

drop_cols = ['Base Rate (%)', 'Dynamic Rate (%)']
X = df.drop(columns=drop_cols)
y = df['Dynamic Rate (%)']


In [3]:
df.columns

Index(['Age', 'Gender', 'Marital Status', 'Dependents', 'Employment',
       'Income (₹)', 'Years in Job', 'Credit Score', 'Active Loans',
       'Missed Payments (12M)', 'Loan Amount (₹)', 'LTV (%)', 'DTI (%)',
       'Loan Tenure (yrs)', 'Repayment Type', 'Co-applicant Present',
       'Co-applicant Income (₹)', 'Existing Customer', 'Salary Account',
       'Base Rate Type', 'Base Rate (%)', 'Dynamic Rate (%)'],
      dtype='object')

In [4]:
df['Repayment Type'].unique()

array(['Step-up EMI', 'Standard EMI'], dtype=object)

In [5]:

cat_cols = X.select_dtypes(include=['object']).columns
le_dict = {}
for col in cat_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col].astype(str))  
    le_dict[col] = le

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

RandomForestRegressor(random_state=42)

In [7]:
from sklearn.metrics import *
import numpy as np
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    
    print("\nModel Evaluation Metrics:")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"R-squared (R²): {r2:.4f}")
    
    return mae, rmse, r2

mae, rmse, r2 = evaluate_model(model, X_test, y_test)


Model Evaluation Metrics:
Mean Absolute Error (MAE): 0.6698
Root Mean Squared Error (RMSE): 0.8193
R-squared (R²): 0.5960


In [8]:
model_objects = {
    'model': model,
    'scaler': scaler,
    'label_encoders': le_dict,  
    'feature_columns': list(X.columns),
    'target_column': 'Dynamic Rate (%)',
    'categorical_columns': list(cat_cols),
    'evaluation_metrics': {
        'MAE': mae,
        'RMSE': rmse,
        'R2': r2
    }
}

with open('home_loan_model.pkl', 'wb') as f:
    pickle.dump(model_objects, f, protocol=pickle.HIGHEST_PROTOCOL)

print("\nModel and preprocessing objects saved to random_forest_model.pkl")
print("\nModel Performance Summary:")
print(f"- MAE on test set: {mae:.4f}")
print(f"- RMSE on test set: {rmse:.4f}")
print(f"- R² on test set: {r2:.4f}")


Model and preprocessing objects saved to random_forest_model.pkl

Model Performance Summary:
- MAE on test set: 0.6698
- RMSE on test set: 0.8193
- R² on test set: 0.5960


In [9]:
import pickle
import pandas as pd

def load_model(pickle_file):
    with open(pickle_file, 'rb') as f:
        return pickle.load(f)

def make_prediction(model_objects, input_data):
    model = model_objects['model']
    scaler = model_objects['scaler']
    label_encoders = model_objects['label_encoders']
    feature_columns = model_objects['feature_columns']
    cat_cols = model_objects.get('categorical_columns', [])

    input_df = pd.DataFrame([input_data])[feature_columns]
    
    for col in cat_cols:
        if col in label_encoders:
            le = label_encoders[col]
            input_df[col] = input_df[col].astype(str).apply(
                lambda x: x if x in le.classes_ else le.classes_[0]
            )
            input_df[col] = le.transform(input_df[col])
    
    input_df = input_df.apply(pd.to_numeric, errors='raise')
    
    X_input = scaler.transform(input_df)
    return model.predict(X_input)[0]

if __name__ == "__main__":
    model_objects = load_model('home_loan_model.pkl')
    
    print("Categorical columns with encoders:", list(model_objects['label_encoders'].keys()))
    
    custom_input = {
        'Age': 35,
        'Gender': 'male',
        'Marital Status': 'Single',
        'Dependents': 1,
        'Employment': 'Salaried',
        'Income (₹)': 4500000,
        'Years in Job': 25,
        'Credit Score': 780,
        'Active Loans': 2,
        'Missed Payments (12M)': 0,
        'Loan Amount (₹)': 1000000,
        'LTV (%)': 70,
        'DTI (%)': 30,
        'Loan Tenure (yrs)': 35,
        'Repayment Type': 'Standard EMI',
        'Co-applicant Present': 'No',
        'Co-applicant Income (₹)': 0,
        'Existing Customer': 'Yes',
        'Salary Account': 'Yes',
        'Base Rate Type': 'Floating'
    }
    
    try:
        prediction = make_prediction(model_objects, custom_input)
        print(f"\nPredicted Dynamic Rate: {prediction:.2f}%")
    except Exception as e:
        print(f"\nPrediction failed: {str(e)}")

Categorical columns with encoders: ['Gender', 'Marital Status', 'Employment', 'Repayment Type', 'Co-applicant Present', 'Existing Customer', 'Salary Account', 'Base Rate Type']

Predicted Dynamic Rate: 6.84%
