In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
dataset=pd.read_csv("reduced_dataset.csv")

In [4]:
from sklearn.preprocessing import LabelEncoder

# Copy dataframe
dataset = dataset.copy()

# Columns to encode
categorical_cols = ['Education',
  'EmploymentType',
  'MaritalStatus',
  'HasMortgage',
  'HasDependents',
  'LoanPurpose',
  'HasCoSigner'
]

# Apply LabelEncoder
le = LabelEncoder()
for col in categorical_cols:
    dataset[col] = le.fit_transform(dataset[col].astype(str))

In [5]:
dataset = dataset.drop(columns=["LoanID"], errors="ignore")

In [6]:
independent=dataset[['Age', 'Income', 'LoanAmount', 'CreditScore',
       'MonthsEmployed', 'NumCreditLines', 'InterestRate', 'LoanTerm',
       'DTIRatio', 'Education', 'EmploymentType', 'MaritalStatus',
       'HasMortgage', 'HasDependents', 'LoanPurpose', 'HasCoSigner']]

In [7]:
dependent=dataset[['Default']]

In [8]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(independent,dependent,test_size=1/3,random_state=0)

In [9]:
#SVM Classification
from sklearn.svm import SVC
classifier=SVC(kernel='rbf', random_state=0)
classifier=classifier.fit(X_train,y_train)
y_predict=classifier.predict(X_test)
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,y_predict)
from sklearn.metrics import classification_report
cl_report=classification_report(y_test,y_predict)
print(cm)
print(cl_report)

[[303   0]
 [ 31   0]]
              precision    recall  f1-score   support

           0       0.91      1.00      0.95       303
           1       0.00      0.00      0.00        31

    accuracy                           0.91       334
   macro avg       0.45      0.50      0.48       334
weighted avg       0.82      0.91      0.86       334



In [10]:
from sklearn.feature_selection import SelectKBest, f_regression

def selectkbest(independent, dependent, n):
    test = SelectKBest(score_func=f_regression, k=n)
    fit1 = test.fit(independent, dependent.values.ravel())
    selectk_features = fit1.transform(independent)
    return selectk_features

kbest = selectkbest(independent, dependent, 3)

In [11]:
from sklearn.feature_selection import SelectKBest, f_classif

# Select top 5 features
selector = SelectKBest(score_func=f_classif, k=7)
X_new = selector.fit_transform(independent, dependent)

# Get selected feature names
selected_features = independent.columns[selector.get_support()]
print(selected_features)

Index(['Age', 'Income', 'LoanAmount', 'MonthsEmployed', 'InterestRate',
       'EmploymentType', 'HasDependents'],
      dtype='object')


In [12]:
import pickle
filename="model_svm.sav"
pickle.dump(y_predict,open(filename,'wb'))

dataset

In [14]:
dataset

Unnamed: 0,Age,Income,LoanAmount,CreditScore,MonthsEmployed,NumCreditLines,InterestRate,LoanTerm,DTIRatio,Education,EmploymentType,MaritalStatus,HasMortgage,HasDependents,LoanPurpose,HasCoSigner,Default
0,55,112656,92393,581,113,2,23.54,36,0.15,3,2,2,1,1,3,0,0
1,56,91569,131575,641,54,1,15.19,12,0.43,1,1,0,1,1,2,1,0
2,26,78169,75417,569,105,3,18.02,12,0.29,2,1,1,1,1,2,1,0
3,26,63033,10804,326,118,1,14.71,24,0.41,1,1,2,0,0,1,1,0
4,24,29665,21182,662,102,3,15.02,60,0.69,3,3,2,0,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,21,35959,200465,310,1,3,23.76,48,0.47,3,0,2,1,1,3,0,1
996,32,57400,68970,574,39,2,16.47,60,0.87,2,3,0,1,1,4,0,0
997,38,47736,200653,705,54,2,12.44,48,0.47,2,2,1,1,1,1,0,1
998,18,93942,202636,820,107,4,13.04,48,0.66,2,2,2,1,1,0,1,0


In [15]:
# Save the trained model
filename = "model_svm.sav"
pickle.dump(classifier, open(filename, 'wb'))

# Load the model
loaded_model = pickle.load(open("model_svm.sav", 'rb'))

# Use the loaded model for prediction
result = loaded_model.predict([[67,29180,218027,703,109,1,4.26,60,0.75,3,0,0,1,1,2,1]])
print("Prediction:", result)

Prediction: [0]
