In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pickle

In [2]:
df = pd.read_csv("bank.csv", delimiter=";")

In [3]:
df["y"] = df["y"].map({"no": 0, "yes": 1})

In [4]:
categorical_cols = ["job", "marital", "education", "default", "housing", "loan", "contact", "month", "poutcome"]
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

In [5]:
X = df_encoded.drop(columns=["y"])
y = df_encoded["y"]

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

In [8]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [9]:
train_acc = accuracy_score(y_train, model.predict(X_train))
test_acc = accuracy_score(y_test, model.predict(X_test))

In [10]:
model_filename = "credit_scoring_model.pkl"
with open(model_filename, "wb") as file:
    pickle.dump(model, file)

In [11]:
print(f"Training Accuracy: {train_acc:.2%}")
print(f"Testing Accuracy: {test_acc:.2%}")
print(f"Model saved as {model_filename}")

Training Accuracy: 90.85%
Testing Accuracy: 89.17%
Model saved as credit_scoring_model.pkl
