In [1]:
from google.colab import drive
drive.mount ('/content/drive')

Mounted at /content/drive


In [3]:
filepath =  '/content/drive/My Drive/Colab Notebooks/Datasets/DSDP - creditsetcategorical.csv'

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from keras.models import Sequential
from keras.layers import Dense

In [5]:
data = pd.read_csv(filepath, sep=',')

In [6]:
data.head(50)

Unnamed: 0,income,age,loan,LTI,default10yr
0,66155.9251,59.017015,8106.532131,0.122537,No
1,34415.15397,48.117153,6564.745018,0.190752,No
2,57317.17006,63.108049,8020.953296,0.13994,No
3,42709.5342,45.751972,6103.64226,0.142911,No
4,66952.68885,18.584336,8770.099235,0.13099,Yes
5,24904.06414,57.471607,15.498598,0.000622,No
6,48430.35961,26.809132,5722.581981,0.118161,No
7,24500.14198,32.897548,2971.00331,0.121265,Yes
8,40654.89254,55.496853,4755.82528,0.11698,No
9,25075.87277,39.776378,1409.230371,0.056199,No


In [7]:
data.dtypes

income         float64
age            float64
loan           float64
LTI            float64
default10yr     object
dtype: object

In [8]:
data.shape

(2000, 5)

In [9]:
data.isnull().values.any()

False

In [10]:
data.isnull().sum()

income         0
age            0
loan           0
LTI            0
default10yr    0
dtype: int64

In [12]:
# convert 'default10yr' to binary
data['default10yr'] = data['default10yr'].apply(lambda x: 1 if x == 'Yes' else 0)

In [13]:
X = data.drop('default10yr', axis=1)
y = data['default10yr']

In [14]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [15]:
models = {
    "Decision Tree": DecisionTreeClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Random Forest": RandomForestClassifier(),
    "SVM": SVC(),
    "Naive Bayes": GaussianNB()
}

In [17]:
results = {}
for model in models:
    models[model].fit(X_train, y_train)
    y_pred = models[model].predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    report = classification_report(y_val, y_pred)
    matrix = confusion_matrix(y_val, y_pred)
    cross_val_acc = cross_val_score(models[model], X, y, cv=10).mean()  # 10-fold cross validation
    results[model] = {"Accuracy": acc, "Cross Val Accuracy": cross_val_acc, "Report": report, "Matrix": matrix}

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [18]:
# ANN model
ann = Sequential()
ann.add(Dense(16, activation='relu', input_shape=(X_train.shape[1],)))
ann.add(Dense(16, activation='relu'))
ann.add(Dense(1, activation='sigmoid'))
ann.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
ann.fit(X_train, y_train, epochs=100, batch_size=10, verbose=0)
y_pred = (ann.predict(X_val) > 0.5).astype("int32")
acc = accuracy_score(y_val, y_pred)
report = classification_report(y_val, y_pred)
matrix = confusion_matrix(y_val, y_pred)
results["ANN"] = {"Accuracy": acc, "Cross Val Accuracy": "N/A", "Report": report, "Matrix": matrix}



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
# print results
for model in results:
    print(f"{model}:\nAccuracy: {results[model]['Accuracy']}\nCross Val Accuracy: {results[model]['Cross Val Accuracy']}\nReport:\n{results[model]['Report']}\nConfusion Matrix:\n{results[model]['Matrix']}\n")

Decision Tree:
Accuracy: 0.9975
Cross Val Accuracy: 0.999
Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       330
           1       1.00      0.99      0.99        70

    accuracy                           1.00       400
   macro avg       1.00      0.99      1.00       400
weighted avg       1.00      1.00      1.00       400

Confusion Matrix:
[[330   0]
 [  1  69]]

AdaBoost:
Accuracy: 0.9975
Cross Val Accuracy: 0.999
Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       330
           1       1.00      0.99      0.99        70

    accuracy                           1.00       400
   macro avg       1.00      0.99      1.00       400
weighted avg       1.00      1.00      1.00       400

Confusion Matrix:
[[330   0]
 [  1  69]]

Random Forest:
Accuracy: 0.9975
Cross Val Accuracy: 0.999
Report:
              precision    recall  f1-score   support

           0  

In [20]:
# user input values
#creditsetsample 1
income = float(input("Enter yearly income: "))
age = float(input("Enter age: "))
loan = float(input("Enter loan amount: "))
lti = float(input("Enter loan to income ratio (LTI): "))

# make a prediction with each model
user_data = np.array([[income, age, loan, lti]])

prediction_dt = models['Decision Tree'].predict(user_data)
prediction_ab = models['AdaBoost'].predict(user_data)
prediction_rf = models['Random Forest'].predict(user_data)

# print predictions
print("Will the user default the loan within 10 years?")
print(f"Decision Tree Prediction: {'Yes' if prediction_dt[0] else 'No'}")
print(f"AdaBoost Prediction: {'Yes' if prediction_ab[0] else 'No'}")
print(f"Random Forest Prediction: {'Yes' if prediction_rf[0] else 'No'}")

Enter yearly income: 42710
Enter age: 46
Enter loan amount: 6104
Enter loan to income ratio (LTI): 0.143
Will the user default the loan within 10 years?
Decision Tree Prediction: No
AdaBoost Prediction: No
Random Forest Prediction: No




In [21]:
# user input values
#creditsetsample 2
income = float(input("Enter yearly income: "))
age = float(input("Enter age: "))
loan = float(input("Enter loan amount: "))
lti = float(input("Enter loan to income ratio (LTI): "))

# make a prediction with each model
user_data = np.array([[income, age, loan, lti]])

prediction_dt = models['Decision Tree'].predict(user_data)
prediction_ab = models['AdaBoost'].predict(user_data)
prediction_rf = models['Random Forest'].predict(user_data)

# print predictions
print("Will the user default the loan within 10 years?")
print(f"Decision Tree Prediction: {'Yes' if prediction_dt[0] else 'No'}")
print(f"AdaBoost Prediction: {'Yes' if prediction_ab[0] else 'No'}")
print(f"Random Forest Prediction: {'Yes' if prediction_rf[0] else 'No'}")

Enter yearly income: 66953
Enter age: 19
Enter loan amount: 8770
Enter loan to income ratio (LTI): 0.131
Will the user default the loan within 10 years?
Decision Tree Prediction: Yes
AdaBoost Prediction: Yes
Random Forest Prediction: Yes




In [22]:
# user input values
#creditsetsample 3
income = float(input("Enter yearly income: "))
age = float(input("Enter age: "))
loan = float(input("Enter loan amount: "))
lti = float(input("Enter loan to income ratio (LTI): "))

# make a prediction with each model
user_data = np.array([[income, age, loan, lti]])

prediction_dt = models['Decision Tree'].predict(user_data)
prediction_ab = models['AdaBoost'].predict(user_data)
prediction_rf = models['Random Forest'].predict(user_data)

# print predictions
print("Will the user default the loan within 10 years?")
print(f"Decision Tree Prediction: {'Yes' if prediction_dt[0] else 'No'}")
print(f"AdaBoost Prediction: {'Yes' if prediction_ab[0] else 'No'}")
print(f"Random Forest Prediction: {'Yes' if prediction_rf[0] else 'No'}")

Enter yearly income: 24904
Enter age: 57
Enter loan amount: 15
Enter loan to income ratio (LTI): 0.001
Will the user default the loan within 10 years?
Decision Tree Prediction: No
AdaBoost Prediction: No
Random Forest Prediction: No




In [None]:
# user input values
#run this for other values :)
income = float(input("Enter yearly income: "))
age = float(input("Enter age: "))
loan = float(input("Enter loan amount: "))
lti = float(input("Enter loan to income ratio (LTI): "))

# make a prediction with each model
user_data = np.array([[income, age, loan, lti]])

prediction_dt = models['Decision Tree'].predict(user_data)
prediction_ab = models['AdaBoost'].predict(user_data)
prediction_rf = models['Random Forest'].predict(user_data)

# print predictions
print("Will the user default the loan within 10 years?")
print(f"Decision Tree Prediction: {'Yes' if prediction_dt[0] else 'No'}")
print(f"AdaBoost Prediction: {'Yes' if prediction_ab[0] else 'No'}")
print(f"Random Forest Prediction: {'Yes' if prediction_rf[0] else 'No'}")