# Loan Prediction: Model Training and Evaluation

## 1. Load and Preprocess Data

In [None]:
import pandas as pd\nfrom sklearn.model_selection import train_test_split, cross_val_score\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn.ensemble import RandomForestClassifier\nimport xgboost as xgb\nfrom sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n\n# Load the dataset\ndf = pd.read_csv('data/loan_train.csv')\n\n# Drop Loan_ID\ndf = df.drop('Loan_ID', axis=1)\n\n# Handle Missing Values\ndf['Gender'].fillna(df['Gender'].mode()[0], inplace=True)\ndf['Married'].fillna(df['Married'].mode()[0], inplace=True)\ndf['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)\ndf['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)\ndf['LoanAmount'].fillna(df['LoanAmount'].median(), inplace=True)\ndf['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].median(), inplace=True)\ndf['Credit_History'].fillna(df['Credit_History'].median(), inplace=True)\n\n# Categorical Feature Encoding\nle = LabelEncoder()\nfor col in ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status', 'Dependents']:\n    df[col] = le.fit_transform(df[col])\n\n# Split data into features (X) and target (y)\nX = df.drop('Loan_Status', axis=1)\ny = df['Loan_Status']\n\n# Split data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 2. Model Training and Evaluation

In [None]:
models = {\n    'Logistic Regression': LogisticRegression(max_iter=1000),\n    'Random Forest': RandomForestClassifier(),\n    'XGBoost': xgb.XGBClassifier(use_label_encoder=False, eval_metric='logloss')\n}\n\nresults = {\n    'Model': [],\n    'Accuracy': [],\n    'Precision': [],\n    'Recall': [],\n    'F1-Score': [],\n    'ROC-AUC': [],\n    'CV_Accuracy': []\n}\n\nfor name, model in models.items():\n    # Cross-validation\n    cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')\n    results['CV_Accuracy'].append(cv_scores.mean())\n    \n    # Fit the model\n    model.fit(X_train, y_train)\n    \n    # Make predictions\n    y_pred = model.predict(X_test)\n    y_pred_proba = model.predict_proba(X_test)[:, 1]\n    \n    # Evaluate metrics\n    results['Model'].append(name)\n    results['Accuracy'].append(accuracy_score(y_test, y_pred))\n    results['Precision'].append(precision_score(y_test, y_pred))\n    results['Recall'].append(recall_score(y_test, y_pred))\n    results['F1-Score'].append(f1_score(y_test, y_pred))\n    results['ROC-AUC'].append(roc_auc_score(y_test, y_pred_proba))\n\nresults_df = pd.DataFrame(results)\n

## 3. Model Comparison

In [None]:
results_df