In [1]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer

In [None]:
dataset = pd.read_csv('Credit2.csv', sep = ';')
dataset

Unnamed: 0,ID,checking_status,credit_history,duration,credit_amount,installment_commitment,residence_since,age,existing_credits,num_dependents,class
0,1,<0,critical/other existing credit,6,1169,4,4,67,2,1,good
1,2,0<=X<200,existing paid,48,5951,2,2,22,1,1,bad
2,3,no checking,critical/other existing credit,12,2096,2,3,49,1,2,good
3,4,<0,existing paid,42,7882,2,4,45,1,2,good
4,5,<0,delayed previously,24,4870,3,4,53,2,2,bad
...,...,...,...,...,...,...,...,...,...,...,...
995,996,no checking,existing paid,12,1736,3,4,31,1,1,good
996,997,<0,existing paid,30,3857,4,4,40,1,1,good
997,998,no checking,existing paid,12,804,4,4,38,1,1,good
998,999,<0,existing paid,45,1845,4,4,23,1,1,bad


In [None]:
#separação das variáveis, ignora primeira pois não tem valor semântico
x = dataset.iloc[:,1:10].values
y = dataset.iloc[:, 10].values
#temos um array e não mais um dataframe
x

array([['<0', 'critical/other existing credit', 6, ..., 67, 2, 1],
       ['0<=X<200', 'existing paid', 48, ..., 22, 1, 1],
       ['no checking', 'critical/other existing credit', 12, ..., 49, 1,
        2],
       ...,
       ['no checking', 'existing paid', 12, ..., 38, 1, 1],
       ['<0', 'existing paid', 45, ..., 23, 1, 1],
       ['0<=X<200', 'critical/other existing credit', 45, ..., 27, 1, 1]],
      dtype=object)

In [None]:
#label encoder coluna checking_status
#atribui valores de zero a 3
labelencoder = LabelEncoder()
x[:,0] = labelencoder.fit_transform(x[:,0])
x

array([[1, 'critical/other existing credit', 6, ..., 67, 2, 1],
       [0, 'existing paid', 48, ..., 22, 1, 1],
       [3, 'critical/other existing credit', 12, ..., 49, 1, 2],
       ...,
       [3, 'existing paid', 12, ..., 38, 1, 1],
       [1, 'existing paid', 45, ..., 23, 1, 1],
       [0, 'critical/other existing credit', 45, ..., 27, 1, 1]],
      dtype=object)

In [None]:
#onehotencoder coluna credit_history
#deve adicionar 5 colunas
onehotencoder = make_column_transformer((OneHotEncoder(categories='auto', sparse=False), [1]), remainder="passthrough")
x = onehotencoder.fit_transform(x)
x

array([[0.0, 1.0, 0.0, ..., 67, 2, 1],
       [0.0, 0.0, 0.0, ..., 22, 1, 1],
       [0.0, 1.0, 0.0, ..., 49, 1, 2],
       ...,
       [0.0, 0.0, 0.0, ..., 38, 1, 1],
       [0.0, 0.0, 0.0, ..., 23, 1, 1],
       [0.0, 1.0, 0.0, ..., 27, 1, 1]], dtype=object)

In [None]:
#excluímos a variável para evitar a dummy variable strap
x = x[:, 1:]
x

array([[1.0, 0.0, 0.0, ..., 67, 2, 1],
       [0.0, 0.0, 1.0, ..., 22, 1, 1],
       [1.0, 0.0, 0.0, ..., 49, 1, 2],
       ...,
       [0.0, 0.0, 1.0, ..., 38, 1, 1],
       [0.0, 0.0, 1.0, ..., 23, 1, 1],
       [1.0, 0.0, 0.0, ..., 27, 1, 1]], dtype=object)

In [None]:
#labelencoder com a classe
labelencoder_y = LabelEncoder()
y = labelencoder_y.fit_transform(y)
y

array([1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,

In [None]:
#separação entre conjuntos de treino e teste
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)
print(len(x_train), len(x_test), len(y_train), len(y_test))

800 200 800 200


In [None]:
#Feature Scalling, Padronização z-score
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)
x_test

array([[-0.65270587, -0.30966177,  0.95357636, ..., -0.50870719,
        -0.71596668, -0.42214126],
       [-0.65270587, -0.30966177,  0.95357636, ..., -0.85315557,
        -0.71596668,  2.36887531],
       [-0.65270587, -0.30966177,  0.95357636, ...,  0.61075002,
        -0.71596668, -0.42214126],
       ...,
       [ 1.53208366, -0.30966177, -1.04868371, ...,  0.26630165,
         1.04100677, -0.42214126],
       [-0.65270587,  3.22932987, -1.04868371, ...,  0.52463793,
         4.55495365, -0.42214126],
       [-0.65270587, -0.30966177,  0.95357636, ...,  0.52463793,
        -0.71596668,  2.36887531]])

In [None]:
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 12))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.fit(x_train, y_train, batch_size = 10, epochs = 1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7f70f2702220>

In [None]:
y_pred = classifier.predict(x_test)
y_pred = (y_pred>0.5)
y_pred



array([[False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 26,  32],
       [ 20, 122]])