In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn import metrics
from sklearn.exceptions import ConvergenceWarning
import warnings

# Load the dataset
df = pd.read_excel('C:\\Users\\a1787\\Documents\\00term4\\1(4880)IntroductiontoDataAnalysis\\assignment2\\Bank_Personal_Loan_Modelling.xlsx', sheet_name='Data')

# Separate features and target variable
X = df.copy()
del X['Personal Loan']
del X['ID']
del X['ZIP Code']
del X['Age']
del X['Experience']

# Target variable
y = df['Personal Loan']

# Replacing negative values with 0
X[X < 0] = 0

# One-hot encode the 'IncomeBin' column
X['IncomeBin'] = pd.cut(x=df['Income'], bins=[0, 116, 225])
X = pd.get_dummies(X, columns=['IncomeBin'])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Apply MinMax Scaler
# scaler = MinMaxScaler()
# scaler = StandardScaler()
scaler = RobustScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Ignore convergence warning during fitting
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# Logistic Regression model with specified hyperparameters
model = LogisticRegression(penalty='l1', C=1000, solver='liblinear')

# Train the model
model.fit(X_train_scaled, y_train)

# Predictions on the test set
y_pred = model.predict(X_test_scaled)

# Predicted probabilities for the positive class
predictions_prob = model.predict_proba(X_test_scaled)[:, 1]

# Calculate AUC
auc = roc_auc_score(y_test, predictions_prob)

# Calculate RMSE
RMSE = np.sqrt(metrics.mean_squared_error(y_test, y_pred))

# Print results
print("Results with Best Hyperparameters and standard Scaler:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1-Score:", f1_score(y_test, y_pred))
print("Area Under Curve:", auc)
print(f'RMSE: {RMSE}')


Results with Best Hyperparameters and standard Scaler:
Accuracy: 0.9664
Precision: 0.9125
Recall: 0.6759259259259259
F1-Score: 0.7765957446808511
Area Under Curve: 0.9433741973146528
RMSE: 0.1833030277982336
