In [None]:
import pandas as pd
import numpy as np
import joblib
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report





In [None]:
# Load dataset
train = pd.read_csv('train_ctrUa4K.csv')

# Handle missing values
train.fillna(train.median(numeric_only=True), inplace=True)
train.fillna(train.mode().iloc[0], inplace=True)



In [None]:
# Encode categorical variables
train['Gender'] = train['Gender'].map({'Male': 0, 'Female': 1})
train['Married'] = train['Married'].map({'No': 0, 'Yes': 1})
train['Loan_Status'] = train['Loan_Status'].map({'N': 0, 'Y': 1})

# Feature selection
X = train[['Gender', 'Married', 'ApplicantIncome', 'LoanAmount', 'Credit_History']]
y = train['Loan_Status']

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)



In [None]:
# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=10)
rf_model.fit(x_train, y_train)

# Train XGBoost model
xgb_model = XGBClassifier(n_estimators=200, max_depth=10, use_label_encoder=False, eval_metric='logloss', random_state=10)
xgb_model.fit(x_train, y_train)



In [None]:
# Evaluate both models
rf_pred = rf_model.predict(x_test)
xgb_pred = xgb_model.predict(x_test)

print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_pred))



In [None]:
# Save models
joblib.dump(rf_model, 'random_forest.pkl')
joblib.dump(xgb_model, 'xgboost.pkl')

print("Models saved successfully!")