In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import time

# Load the dataset
df = pd.read_excel('C:\\Users\\a1787\\Documents\\00term4\\1(4880)IntroductiontoDataAnalysis\\assignment2\\Bank_Personal_Loan_Modelling.xlsx', sheet_name='Data')

# Separate features and target variable
X = df.copy()
del X['Personal Loan']
del X['ID']
del X['ZIP Code']
del X['Age']
del X['Experience']

# Target variable
y = df['Personal Loan']

# Replacing negative values with 0
X[X < 0] = 0

# Binning and one-hot encoding
X['IncomeBin'] = pd.cut(x=df['Income'], bins=[0, 116, 225])
X = pd.get_dummies(X, columns=['IncomeBin'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Apply MinMax Scaler to the features
# scaler = MinMaxScaler()
# scaler = StandardScaler()
scaler = RobustScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the Random Forest model with the given best hyperparameters
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train_scaled, y_train)

# Make predictions on the test set
predictions = classifier.predict(X_test_scaled)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
auc = roc_auc_score(y_test, classifier.predict_proba(X_test_scaled)[:, 1])

# Print the results
print("Results with Best Hyperparameters(robust):")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)
print("Area Under Curve:", auc)

Results with Best Hyperparameters(robust):
Accuracy: 0.988
Precision: 0.979381443298969
Recall: 0.8796296296296297
F1-Score: 0.926829268292683
Area Under Curve: 0.9837598105986899
