In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

df = pd.read_csv("bank-additional-full.csv", sep=';')  # UCI bank dataset uses ; separator

print(df[['age', 'job', 'marital']].describe())

               age
count  41188.00000
mean      40.02406
std       10.42125
min       17.00000
25%       32.00000
50%       38.00000
75%       47.00000
max       98.00000


In [3]:
y = df['y'].map({'yes': 1, 'no': 0})

X = pd.get_dummies(df.drop('y', axis=1), drop_first=True) 

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

model = LogisticRegression(
    max_iter=1000,
    C=0.5,               # Controls regularization strength
    solver='liblinear',  # Good for small to mid datasets
    penalty='l2'         # L2 regularization (Ridge)
)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\n📝 Classification Report:\n", classification_report(y_test, y_pred))

✅ Accuracy: 0.9113862588006798

📊 Confusion Matrix:
 [[7105  198]
 [ 532  403]]

📝 Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.97      0.95      7303
           1       0.67      0.43      0.52       935

    accuracy                           0.91      8238
   macro avg       0.80      0.70      0.74      8238
weighted avg       0.90      0.91      0.90      8238



In [6]:
acceptance_by_job = df.groupby("job")["y"].value_counts(normalize=True).unstack()['yes'] * 100
print("Loan acceptance rates by job:\n", acceptance_by_job.sort_values(ascending=False))

Loan acceptance rates by job:
 job
student          31.428571
retired          25.232558
unemployed       14.201183
admin.           12.972558
management       11.217510
unknown          11.212121
technician       10.826042
self-employed    10.485574
housemaid        10.000000
entrepreneur      8.516484
services          8.138070
blue-collar       6.894316
Name: yes, dtype: float64


In [7]:
acceptance_by_education = df.groupby("education")["y"].value_counts(normalize=True).unstack()['yes'] * 100
print("Loan acceptance rates by education:\n", acceptance_by_job.sort_values(ascending=False))

Loan acceptance rates by education:
 job
student          31.428571
retired          25.232558
unemployed       14.201183
admin.           12.972558
management       11.217510
unknown          11.212121
technician       10.826042
self-employed    10.485574
housemaid        10.000000
entrepreneur      8.516484
services          8.138070
blue-collar       6.894316
Name: yes, dtype: float64


Analysis shows that students (31%), retired individuals (25%), and the unemployed (14%) were the most likely to accept the personal loan offer. These groups may be more receptive due to their limited or irregular income streams. In contrast, blue-collar workers (6.9%), entrepreneurs, and services staff had the lowest acceptance rates, possibly due to lower trust in financial products or existing financial constraints.

In [8]:
import numpy as np

feature_names = X.columns
coefficients = model.coef_[0]

coef_df = pd.DataFrame({
    "Feature": feature_names,
    "Coefficient": coefficients
}).sort_values(by="Coefficient", ascending=False)

print(coef_df.head(10))   
print(coef_df.tail(10))   

                        Feature  Coefficient
1                      duration     1.202538
6                cons.price.idx     1.093903
8                     euribor3m     0.613639
38                    month_aug     0.282367
42                    month_mar     0.225432
9                   nr.employed     0.186687
52             poutcome_success     0.156999
51         poutcome_nonexistent     0.147374
7                 cons.conf.idx     0.083215
29  education_university.degree     0.078284
              Feature  Coefficient
15  job_self-employed    -0.051292
10    job_blue-collar    -0.082058
31    default_unknown    -0.112053
41          month_jun    -0.122427
44          month_nov    -0.128785
2            campaign    -0.138329
3               pdays    -0.190758
43          month_may    -0.241569
37  contact_telephone    -0.304799
5        emp.var.rate    -2.477794


Based on logistic regression coefficients, customers with a successful past campaign, contacted in August, and having a university degree are significantly more likely to accept a personal loan offer. Conversely, those contacted via telephone, in May, or with repeated campaign calls tend to be less responsive. The duration of the call is the strongest predictor, but it can only be used after the contact is made.