In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
import pandas as pd
import numpy as np

num_samples = 10000

np.random.seed(42)
data = pd.DataFrame({
    'Age': np.random.randint(18, 70, num_samples),
    'Employment Status': np.random.choice(['Employed', 'Unemployed', 'Self-Employed'], num_samples, p=[0.6, 0.2, 0.2]),
    'Annual Income': np.random.randint(10000, 150000, num_samples),
    'Loan Amount Requested': np.random.randint(1000, 50000, num_samples),
    'Credit Score': np.random.randint(300, 850, num_samples),
})

data['Default Risk'] = np.where(
    (data['Credit Score'] < 600) |
    ((data['Employment Status'] == 'Unemployed') & (data['Annual Income'] < 30000)),
    1,
    0
)

data.to_excel('/content/sample_data/credit.xlsx', index=False)
print("Data generated and saved to 'credit.xlsx'")


Data generated and saved to 'credit.xlsx'


In [None]:
df = pd.read_excel('/content/sample_data/credit.xlsx')
df.head()

Unnamed: 0,Age,Employment Status,Annual Income,Loan Amount Requested,Credit Score,Default Risk
0,56,Unemployed,22402,32005,458,1
1,69,Employed,112880,4302,359,1
2,46,Employed,129764,9511,798,0
3,32,Self-Employed,41050,21991,616,0
4,60,Employed,105808,40136,787,0


In [None]:
X = data[['Age', 'Annual Income', 'Loan Amount Requested', 'Credit Score', 'Employment Status']]
X = pd.get_dummies(X, columns=['Employment Status'], drop_first=True)
y = data['Default Risk']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
y_pred = model.predict(X_test)
y_train_pred = model.predict(X_train)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print(f'Accuracy on Test Data: {accuracy:.2f}')
print('Confusion Matrix (Test Data):')
print(conf_matrix)
print('Classification Report (Test Data):')
print(class_report)

Accuracy on Test Data: 0.91
Confusion Matrix (Test Data):
[[1197  142]
 [ 142 1519]]
Classification Report (Test Data):
              precision    recall  f1-score   support

           0       0.89      0.89      0.89      1339
           1       0.91      0.91      0.91      1661

    accuracy                           0.91      3000
   macro avg       0.90      0.90      0.90      3000
weighted avg       0.91      0.91      0.91      3000



In [None]:
accuracy_train = accuracy_score(y_train, y_train_pred)
conf_matrix_train = confusion_matrix(y_train, y_train_pred)
class_report_train = classification_report(y_train, y_train_pred)

print(f'Accuracy on Training Data: {accuracy_train:.2f}')
print('Confusion Matrix (Training Data):')
print(conf_matrix_train)
print('Classification Report (Training Data):')
print(class_report_train)

Accuracy on Training Data: 0.89
Confusion Matrix (Training Data):
[[2726  341]
 [ 416 3517]]
Classification Report (Training Data):
              precision    recall  f1-score   support

           0       0.87      0.89      0.88      3067
           1       0.91      0.89      0.90      3933

    accuracy                           0.89      7000
   macro avg       0.89      0.89      0.89      7000
weighted avg       0.89      0.89      0.89      7000



In [None]:
def predict_credit_score(age, employment_status, annual_income, loan_amount, credit_score):
    input_data = pd.DataFrame({
        'Age': [age],
        'Annual Income': [annual_income],
        'Loan Amount Requested': [loan_amount],
        'Credit Score': [credit_score],
        'Employment Status_Employed': [1 if employment_status == 'Employed' else 0],
        'Employment Status_Unemployed': [1 if employment_status == 'Unemployed' else 0],

    })

    input_data = input_data.reindex(columns=X.columns, fill_value=0)

    prediction = model.predict(input_data)[0]
    return 'High Risk' if prediction == 1 else 'Low Risk'

In [None]:
new_customer_prediction = predict_credit_score(27, 'Employed', 40000, 30000, 700)


In [None]:
print(f'New Customer Prediction: {new_customer_prediction}')

New Customer Prediction: Low Risk
