In [1]:
# Importing the libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import os
import sys
import warnings
warnings.filterwarnings('ignore')


In [2]:
# Importing the dataset
df_transformed = pd.read_csv('preprocessed_data.csv')

In [3]:
from sklearn.model_selection import train_test_split

# Splitting the dataset into training and testing sets with stratification
X = df_transformed.drop('Approved', axis=1)
y = df_transformed['Approved']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Confirming the split
(X_train.shape, X_test.shape), (y_train.shape, y_test.shape)


(((552, 7), (138, 7)), ((552,), (138,)))

In [4]:
# Initialize the SVM model with default parameters
svm_baseline = SVC()

# Performing 5-fold cross-validation to evaluate the baseline model
cv_scores = cross_val_score(svm_baseline, X_train, y_train, cv=5)

# Calculating the mean cross-validation score
mean_cv_score = cv_scores.mean()

mean_cv_score

0.8495659295659295

In [5]:
from sklearn.model_selection import GridSearchCV

# Defining the parameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'sigmoid'],
    'gamma': [0.001, 0.01, 0.1, 1]
}

# Initialize Grid Search with the SVM model and the defined parameter grid
grid_search = GridSearchCV(SVC(random_state=42), param_grid, cv=5, scoring='accuracy')

# Fitting Grid Search to the training data
grid_search.fit(X_train, y_train)

# Best parameters and best score
best_parameters = grid_search.best_params_
best_score = grid_search.best_score_

best_parameters, best_score


({'C': 100, 'gamma': 0.1, 'kernel': 'rbf'}, 0.8586076986076987)

In [6]:
from sklearn.preprocessing import PolynomialFeatures

# Initializing the PolynomialFeatures generator with degree 2
poly = PolynomialFeatures(degree=2)

# Applying polynomial feature transformation to the training and testing sets
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Re-training the SVM model with the optimal parameters found and polynomial features
svm_optimized = SVC(C=100, gamma=1, kernel='poly', degree=3, random_state=42)
svm_optimized.fit(X_train_poly, y_train)

# Predictions on the test set
y_pred = svm_optimized.predict(X_test_poly)

# Evaluation metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Calculating metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

accuracy, precision, recall, f1, roc_auc

(0.8695652173913043,
 0.9056603773584906,
 0.7868852459016393,
 0.8421052631578947,
 0.8609750904832872)

In [7]:
from imblearn.over_sampling import SMOTE

# Initialize the SMOTE object
smote = SMOTE(random_state=42)

# Applying SMOTE to the training data
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Confirming the resampling
(X_train_resampled.shape, y_train_resampled.shape)

# Re-training the SVM model with the optimal parameters found and the resampled data
svm_optimized_resampled = SVC(C=100, gamma=1, kernel='poly', degree=3, random_state=42)
svm_optimized_resampled.fit(X_train_resampled, y_train_resampled)

# Predictions on the test set
y_pred_resampled = svm_optimized_resampled.predict(X_test)

# Calculating metrics
accuracy_resampled = accuracy_score(y_test, y_pred_resampled)
precision_resampled = precision_score(y_test, y_pred_resampled)
recall_resampled = recall_score(y_test, y_pred_resampled)
f1_resampled = f1_score(y_test, y_pred_resampled)
roc_auc_resampled = roc_auc_score(y_test, y_pred_resampled)

accuracy_resampled, precision_resampled, recall_resampled, f1_resampled, roc_auc_resampled


(0.8478260869565217,
 0.8571428571428571,
 0.7868852459016393,
 0.8205128205128205,
 0.8414945710027677)

In [8]:
from joblib import load
from sklearn.metrics import classification_report, roc_auc_score

# Apply the same transformations to X_test
X_test_transformed = poly.transform(X_test)

# Now you can use svm_optimized to make predictions
# Predict the test set
y_pred_test = svm_optimized.predict(X_test_transformed)

# Evaluate the predictions
print("Classification Report:")
print(classification_report(y_test, y_pred_test))

roc_auc = roc_auc_score(y_test, y_pred_test)
print(f"ROC-AUC Score: {roc_auc}")

Classification Report:
              precision    recall  f1-score   support

           0       0.85      0.94      0.89        77
           1       0.91      0.79      0.84        61

    accuracy                           0.87       138
   macro avg       0.88      0.86      0.87       138
weighted avg       0.87      0.87      0.87       138

ROC-AUC Score: 0.8609750904832872


In [9]:
from sklearn.preprocessing import PolynomialFeatures

# Initializing the PolynomialFeatures generator with degree 2
poly = PolynomialFeatures(degree=2)

# Applying polynomial feature transformation to the training and testing sets
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Re-training the SVM model with the optimal parameters found and polynomial features
svm_optimized = SVC(C=100, gamma=1, kernel='poly', degree=6, random_state=42)
svm_optimized.fit(X_train_poly, y_train)

# Predictions on the test set
y_pred = svm_optimized.predict(X_test_poly)

# Evaluation metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Calculating metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred)

accuracy, precision, recall, f1, roc_auc

(0.8333333333333334,
 0.8653846153846154,
 0.7377049180327869,
 0.7964601769911505,
 0.823397913561848)

In [10]:
from imblearn.over_sampling import SMOTE

# Initialize the SMOTE object
smote = SMOTE(random_state=42)

# Applying SMOTE to the training data
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Confirming the resampling
(X_train_resampled.shape, y_train_resampled.shape)

# Re-training the SVM model with the optimal parameters found and the resampled data
svm_optimized_resampled = SVC(C=100, gamma=1, kernel='poly', degree=6, random_state=42)
svm_optimized_resampled.fit(X_train_resampled, y_train_resampled)

# Predictions on the test set
y_pred_resampled = svm_optimized_resampled.predict(X_test)

# Calculating metrics
accuracy_resampled = accuracy_score(y_test, y_pred_resampled)
precision_resampled = precision_score(y_test, y_pred_resampled)
recall_resampled = recall_score(y_test, y_pred_resampled)
f1_resampled = f1_score(y_test, y_pred_resampled)
roc_auc_resampled = roc_auc_score(y_test, y_pred_resampled)

accuracy_resampled, precision_resampled, recall_resampled, f1_resampled, roc_auc_resampled


(0.8333333333333334,
 0.8653846153846154,
 0.7377049180327869,
 0.7964601769911505,
 0.823397913561848)

In [22]:
from joblib import load
from sklearn.metrics import classification_report, roc_auc_score

# Apply the same transformations to X_test
X_test_transformed = poly.transform(X_test)

# Now you can use svm_optimized to make predictions
# Predict the test set
y_pred_test = svm_optimized.predict(X_test_transformed)

# Evaluate the predictions
print("Classification Report:")
print(classification_report(y_test, y_pred_test))

roc_auc = roc_auc_score(y_test, y_pred_test)
print(f"ROC-AUC Score: {roc_auc}")

Classification Report:
              precision    recall  f1-score   support

           0       0.81      0.91      0.86        77
           1       0.87      0.74      0.80        61

    accuracy                           0.83       138
   macro avg       0.84      0.82      0.83       138
weighted avg       0.84      0.83      0.83       138

ROC-AUC Score: 0.823397913561848
