In [48]:
!pip install pandas scikit-learn joblib




In [50]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import joblib
import os
from sklearn.metrics import classification_report

# Load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00350/default%20of%20credit%20card%20clients.xls"
df = pd.read_excel(url, header=1)
df = df.rename(columns=lambda x: x.strip())

# Select features
features = ['LIMIT_BAL', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0']
X = df[features]
y = df['default payment next month']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Train model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# 🔧 Create models folder if it doesn't exist
os.makedirs('credit-risk-analyzer/models', exist_ok=True)


print("✅ Model trained and saved successfully!")



✅ Model trained and saved successfully!


In [41]:
# Scale the test features before prediction
X_test_scaled = scaler.transform(X_test)

y_pred = model.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.97      0.89      4687
           1       0.66      0.23      0.34      1313

    accuracy                           0.81      6000
   macro avg       0.74      0.60      0.61      6000
weighted avg       0.78      0.81      0.77      6000



In [46]:
# Scale the test features before predicting probability
X_test_scaled = scaler.transform(X_test)

prob_default = model.predict_proba(X_test_scaled)[:, 1]

credit_scores = 300 + (1 - prob_default) * 550  # Simple mapping

# Display the first few credit scores
display(pd.DataFrame({'Credit Score': credit_scores}).head())

Unnamed: 0,Credit Score
0,732.751269
1,745.293363
2,730.869389
3,742.419947
4,727.602393


In [47]:

joblib.dump(model, 'credit-risk-analyzer/models/logistic_model.pkl')

['credit-risk-analyzer/models/logistic_model.pkl']

In [45]:
# Define custom input values
custom_input = {
    'LIMIT_BAL': 50000,
    'EDUCATION': 2, # University
    'MARRIAGE': 1,  # Married
    'AGE': 35,
    'PAY_0': 0      # No delay in payment
}

# Convert custom input to DataFrame
custom_input_df = pd.DataFrame([custom_input])

# Scale the custom input using the same scaler used for training
custom_input_scaled = scaler.transform(custom_input_df)

# Predict the probability of default
pred_prob_custom = model.predict_proba(custom_input_scaled)[0][1]

# Calculate the credit score
score_custom = 300 + (1 - pred_prob_custom) * 550

# Print the results
print(f"Custom Input: {custom_input}")
print(f"Predicted Risk of Default: {round(pred_prob_custom*100, 2)}%")
print(f"Calculated Credit Score: {int(score_custom)}")

if pred_prob_custom > 0.5:
    print("Recommendation: High Risk – Not Recommended for Credit")
else:
    print("Recommendation: Low Risk – Eligible for Credit")

Custom Input: {'LIMIT_BAL': 50000, 'EDUCATION': 2, 'MARRIAGE': 1, 'AGE': 35, 'PAY_0': 0}
Predicted Risk of Default: 24.81%
Calculated Credit Score: 713
Recommendation: Low Risk – Eligible for Credit
