In [30]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler

In [31]:
df = pd.read_csv("default.csv")
df.head()

Unnamed: 0,default,student,balance,income
0,No,No,729.526495,44361.625074
1,No,Yes,817.180407,12106.1347
2,No,No,1073.549164,31767.138947
3,No,No,529.250605,35704.493935
4,No,No,785.655883,38463.495879


In [32]:
#verificación de balanceo de datos.
df['default'].value_counts()

No     9667
Yes     333
Name: default, dtype: int64

In [33]:
#proceso de balanceo de data.
nYes = len(df[df['default'] == "Yes"])
yes = df[df['default'] == "Yes"]
no = df[df['default'] == "No"]
no = no.sample(2*nYes, random_state=2022)
df = no.append(yes)
df = df.sample(frac=1, random_state=2022)
df

Unnamed: 0,default,student,balance,income
1801,No,Yes,970.515947,16584.828289
9296,No,No,1176.791505,30579.249224
651,Yes,No,780.172569,51656.874064
9856,Yes,No,1973.822147,27340.012269
7827,Yes,Yes,2083.228376,20103.602739
...,...,...,...,...
740,Yes,Yes,1504.290178,13965.186045
5830,No,No,709.257520,23249.936914
3335,No,No,331.167032,44752.229292
9315,No,No,347.371078,18488.147316


In [34]:
X = df[['balance', 'income']]
y = df['default']

In [35]:
#Ingeniería de caracteristicas - Codificación del Target.
lableEncoder = LabelEncoder()
lableEncoder.fit(['No', 'Yes'])
y = lableEncoder.transform(y.values)

In [36]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, shuffle=True, random_state=2022)

In [37]:
from sklearn.svm import SVC

In [38]:
svm = SVC(kernel="linear", C=1)
svm

SVC(C=1, kernel='linear')

In [39]:
svm.fit(X_train, y_train) #entrenamos modelo.

SVC(C=1, kernel='linear')

In [40]:
y_preds_svm = svm.predict(X_test)

In [41]:
y_preds_svm

array([0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0])

In [42]:
print("Accuracy: ", accuracy_score(y_test, y_preds_svm))

Accuracy:  0.87


In [43]:
conf_matrix = pd.crosstab(y_test, y_preds_svm, rownames=["observación"], colnames=["Predicción"])
print("Matriz de Confusión: \n\n", conf_matrix)

Matriz de Confusión: 

 Predicción     0   1
observación         
0            175  15
1             24  86


In [44]:
TP = conf_matrix.iloc[1,1]
TN = conf_matrix.iloc[0,0]
FN = conf_matrix.iloc[1,0]
FP = conf_matrix.iloc[0,1]

In [45]:
print("Sentitividad: ", TP/(TP+FN))

Sentitividad:  0.7818181818181819


In [46]:
print("Especificidad: ", TN/(TN+FP))

Especificidad:  0.9210526315789473


In [47]:
from Plotting_Decision_Boundaries import plotting_db

In [48]:
X_total = np.vstack((X_train, X_test))
y_total = np.hstack((y_train, y_test))

In [49]:
#plotting_db(X=X_total, y=y_total, classifier=svm)

### Naive - Bayes

In [50]:
from sklearn.naive_bayes import GaussianNB

In [51]:
clf_nb = GaussianNB()
clf_nb

GaussianNB()

In [52]:
clf_nb.fit(X_train, y_train)

GaussianNB()

In [53]:
y_preds_nb = clf_nb.predict(X_test)

In [54]:
print("Accuracy: ", accuracy_score(y_test, y_preds_nb))

Accuracy:  0.8666666666666667


In [55]:
conf_matrix = pd.crosstab(y_test, y_preds_svm, rownames=["observación"], colnames=["Predicción"])
print("Matriz de Confusión: \n\n", conf_matrix)

Matriz de Confusión: 

 Predicción     0   1
observación         
0            175  15
1             24  86


In [56]:
TP = conf_matrix.iloc[1,1]
TN = conf_matrix.iloc[0,0]
FN = conf_matrix.iloc[1,0]
FP = conf_matrix.iloc[0,1]

In [57]:
print("Sentitividad: ", TP/(TP+FN))

Sentitividad:  0.7818181818181819


In [58]:
print("Especificidad: ", TN/(TN+FP))

Especificidad:  0.9210526315789473
