# Loan Prediction: Hyperparameter Tuning

## 1. Load and Preprocess Data

In [None]:
import pandas as pd\nfrom sklearn.model_selection import train_test_split, GridSearchCV\nfrom sklearn.preprocessing import LabelEncoder\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.metrics import accuracy_score, classification_report\n\n# Load the dataset\ndf = pd.read_csv('data/loan_train.csv')\n\n# Drop Loan_ID\ndf = df.drop('Loan_ID', axis=1)\n\n# Handle Missing Values\ndf['Gender'].fillna(df['Gender'].mode()[0], inplace=True)\ndf['Married'].fillna(df['Married'].mode()[0], inplace=True)\ndf['Dependents'].fillna(df['Dependents'].mode()[0], inplace=True)\ndf['Self_Employed'].fillna(df['Self_Employed'].mode()[0], inplace=True)\ndf['LoanAmount'].fillna(df['LoanAmount'].median(), inplace=True)\ndf['Loan_Amount_Term'].fillna(df['Loan_Amount_Term'].median(), inplace=True)\ndf['Credit_History'].fillna(df['Credit_History'].median(), inplace=True)\n\n# Categorical Feature Encoding\nle = LabelEncoder()\nfor col in ['Gender', 'Married', 'Education', 'Self_Employed', 'Property_Area', 'Loan_Status', 'Dependents']:\n    df[col] = le.fit_transform(df[col])\n\n# Split data into features (X) and target (y)\nX = df.drop('Loan_Status', axis=1)\ny = df['Loan_Status']\n\n# Split data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 2. Hyperparameter Tuning with GridSearchCV

In [None]:
param_grid = {\n    'n_estimators': [100, 200, 300],\n    'max_depth': [None, 10, 20, 30],\n    'min_samples_split': [2, 5, 10],\n    'min_samples_leaf': [1, 2, 4]\n}\n\nrf = RandomForestClassifier(random_state=42)\n\ngrid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2, scoring='accuracy')\n\ngrid_search.fit(X_train, y_train)

## 3. Best Parameters and Score

In [None]:
print(f'Best Parameters: {grid_search.best_params_}')\nprint(f'Best Score: {grid_search.best_score_}')

## 4. Evaluate Tuned Model

In [None]:
best_rf = grid_search.best_estimator_\ny_pred = best_rf.predict(X_test)\n\nprint('Accuracy of the best model on the test set:')\nprint(accuracy_score(y_test, y_pred))\n\nprint('Classification report of the best model on the test set:')\nprint(classification_report(y_test, y_pred))