In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
#Read dataset files
train_df = pd.read_csv('dataset/train_data_cleaned.csv')

In [None]:
#See what the dataset is like
train_df

In [None]:
#Going to work with just the train_df dataset since it has over 40,000 entries
X = train_df.drop('credit_card_default', axis=1)
y = train_df['credit_card_default']
target_names = ["no-default", "default"]

print(X.shape, y.shape)

In [None]:
#Split the dataset
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

y

In [None]:
# Scale the data
from sklearn.preprocessing import StandardScaler 
X_scaler = StandardScaler().fit(X_train)

# Transform the X_train and X_test
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

X_train_scaled

In [None]:
#SVM Model
from sklearn.svm import SVC
model = SVC(kernel='linear')
model.fit(X_train_scaled, y_train)

In [None]:
# Model Accuracy
print('Test Acc: %.3f' % model.score(X_test_scaled, y_test))

In [None]:
 # Calculate classification report
from sklearn.metrics import classification_report
predictions = model.predict(X_test_scaled)
print(classification_report(y_test, predictions,
                            target_names=target_names))

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

sns.heatmap(confusion_matrix(y_test, predictions), annot = True, fmt="d")

In [None]:
predictions_df = pd.DataFrame({"Prediction": predictions})
actual_df = pd.DataFrame({"Actual": y_test})

In [None]:
predictions_df.value_counts()

In [None]:
actual_df.value_counts()

In [None]:
#Save the model
import joblib

filename = 'saved_models/SVM_trained.joblib'
joblib.dump(model, filename)

In [None]:
#Load the model
loaded_model = joblib.load(filename)
print('Test Acc: %.3f' % loaded_model.score(X_test_scaled, y_test))