In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

# Load your dataset (assuming CSV file from previous step)
df = pd.read_csv('loan_applicants.csv')

# Separate features and target
X = df.drop('Loan_Approved', axis=1)
y = df['Loan_Approved']

# Categorical columns to encode
cat_cols = ['Loan_Type', 'Gender', 'Marital_Status']

# One-hot encode categorical variables
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
X_encoded = encoder.fit_transform(X[cat_cols])

# Create a DataFrame for encoded features
encoded_df = pd.DataFrame(X_encoded, columns=encoder.get_feature_names_out(cat_cols))

# Combine encoded columns with numeric columns
X_numeric = X.drop(columns=cat_cols).reset_index(drop=True)
X_processed = pd.concat([X_numeric, encoded_df], axis=1)

# Split data into train/test sets
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.2, random_state=42)

# Initialize and train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# Save the model and encoder for later use
joblib.dump(model, 'loan_default_model.pkl')
joblib.dump(encoder, 'loan_default_encoder.pkl')


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00       749
           1       1.00      1.00      1.00       251

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



['loan_default_encoder.pkl']