# Formação Cientista de Dados - Fernando Amaral e Jones Granatyr
# Deep Learning

In [1]:
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import make_column_transformer

In [6]:
dataset = pd.read_csv("../dados/Credit2.csv", sep=";")
dataset

Unnamed: 0,ID,checking_status,credit_history,duration,credit_amount,installment_commitment,residence_since,age,existing_credits,num_dependents,class
0,1,<0,critical/other existing credit,6,1169,4,4,67,2,1,good
1,2,0<=X<200,existing paid,48,5951,2,2,22,1,1,bad
2,3,no checking,critical/other existing credit,12,2096,2,3,49,1,2,good
3,4,<0,existing paid,42,7882,2,4,45,1,2,good
4,5,<0,delayed previously,24,4870,3,4,53,2,2,bad
...,...,...,...,...,...,...,...,...,...,...,...
995,996,no checking,existing paid,12,1736,3,4,31,1,1,good
996,997,<0,existing paid,30,3857,4,4,40,1,1,good
997,998,no checking,existing paid,12,804,4,4,38,1,1,good
998,999,<0,existing paid,45,1845,4,4,23,1,1,bad


In [7]:
# Separação dos variáveis, ignoro primeira pois não tem valor semântico
X = dataset.iloc[:,1:10].values
y = dataset.iloc[:, 10].values
# Temos um arry e não mais um data frame
X

array([['<0', 'critical/other existing credit', 6, ..., 67, 2, 1],
       ['0<=X<200', 'existing paid', 48, ..., 22, 1, 1],
       ['no checking', 'critical/other existing credit', 12, ..., 49, 1,
        2],
       ...,
       ['no checking', 'existing paid', 12, ..., 38, 1, 1],
       ['<0', 'existing paid', 45, ..., 23, 1, 1],
       ['0<=X<200', 'critical/other existing credit', 45, ..., 27, 1, 1]],
      dtype=object)

In [8]:
# Label encoder coluna checking_status
# Atribui valores de zero a 3
labelencoder = LabelEncoder()
X[:,0] = labelencoder.fit_transform(X[:,0])
X

array([[1, 'critical/other existing credit', 6, ..., 67, 2, 1],
       [0, 'existing paid', 48, ..., 22, 1, 1],
       [3, 'critical/other existing credit', 12, ..., 49, 1, 2],
       ...,
       [3, 'existing paid', 12, ..., 38, 1, 1],
       [1, 'existing paid', 45, ..., 23, 1, 1],
       [0, 'critical/other existing credit', 45, ..., 27, 1, 1]],
      dtype=object)

In [9]:
# One hot encoder coluna credit_history
# Deve adicionar 5 colunas
# Na versão 1.4 o comando sparse foi renomeado para sparse_output
onehotencoder = make_column_transformer((OneHotEncoder(categories='auto', sparse_output=False), [1]), remainder="passthrough")
X = onehotencoder.fit_transform(X)
X

array([[0.0, 1.0, 0.0, ..., 67, 2, 1],
       [0.0, 0.0, 0.0, ..., 22, 1, 1],
       [0.0, 1.0, 0.0, ..., 49, 1, 2],
       ...,
       [0.0, 0.0, 0.0, ..., 38, 1, 1],
       [0.0, 0.0, 0.0, ..., 23, 1, 1],
       [0.0, 1.0, 0.0, ..., 27, 1, 1]], dtype=object)

In [10]:
# Excluimos a variável para evitar a dummy variable trap
X = X[:,1:]
X

array([[1.0, 0.0, 0.0, ..., 67, 2, 1],
       [0.0, 0.0, 1.0, ..., 22, 1, 1],
       [1.0, 0.0, 0.0, ..., 49, 1, 2],
       ...,
       [0.0, 0.0, 1.0, ..., 38, 1, 1],
       [0.0, 0.0, 1.0, ..., 23, 1, 1],
       [1.0, 0.0, 0.0, ..., 27, 1, 1]], dtype=object)

In [11]:
# Laber encoder com a classe
labelencoder_Y = LabelEncoder()
y = labelencoder_Y.fit_transform(y)
y

array([1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,

In [12]:
# Separação em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
print(len(X_train),len(X_test),len(y_train),len(y_test))

800 200 800 200


In [13]:
# Feature Scalling, Padronização z-score
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_test

array([[-0.65270587, -0.30966177,  0.95357636, ..., -0.50870719,
        -0.71596668, -0.42214126],
       [-0.65270587, -0.30966177,  0.95357636, ..., -0.85315557,
        -0.71596668,  2.36887531],
       [-0.65270587, -0.30966177,  0.95357636, ...,  0.61075002,
        -0.71596668, -0.42214126],
       ...,
       [ 1.53208366, -0.30966177, -1.04868371, ...,  0.26630165,
         1.04100677, -0.42214126],
       [-0.65270587,  3.22932987, -1.04868371, ...,  0.52463793,
         4.55495365, -0.42214126],
       [-0.65270587, -0.30966177,  0.95357636, ...,  0.52463793,
        -0.71596668,  2.36887531]])

In [14]:
classifier = Sequential()
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu', input_dim = 12))
classifier.add(Dense(units = 6, kernel_initializer = 'uniform', activation = 'relu'))
classifier.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
classifier.fit(X_train, y_train, batch_size = 10, epochs = 100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7c8590>

In [15]:
y_pred = classifier.predict(X_test)
y_pred = (y_pred > 0.5)
y_pred



array([[False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [False],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [False],
       [ True],
       [ True],
       [ True],
       [False],
       [ True],
       [

In [16]:
# Matriz de confusão
cm = confusion_matrix(y_test, y_pred)
cm

array([[ 24,  34],
       [ 20, 122]], dtype=int64)