Autor: Felipe Colombelli
```
```
GitHub: @colombelli

In [0]:
# Utilizaremos dois datasets do scikit-learn
from sklearn import datasets as ds


# Breast cancer identification
# https://scikit-learn.org/stable/datasets/index.html#breast-cancer-dataset
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_breast_cancer.html#sklearn.datasets.load_breast_cancer
x, y = ds.load_breast_cancer(return_X_y=True)


# House pricing em Boston
# https://scikit-learn.org/stable/datasets/index.html#boston-dataset
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_boston.html#sklearn.datasets.load_boston
x2, y2 = ds.load_boston(return_X_y=True)

### Proposta de solução para o breast cancer dataset

In [0]:
import tensorflow as tf

# Normalização dos dados e transformação das labels para OHE
x = tf.keras.utils.normalize(x, axis=1)
y = tf.keras.utils.to_categorical(y, 2)  

In [0]:
# Investigação do formato

print(x.shape, y.shape)

(569, 30) (569, 2)


In [0]:
# Separação dos dados em treino/teste

n_train = round(x.shape[0] * 0.8)
x_train = x[0:n_train]
x_test = x[n_train:]

y_train = y[0:n_train]
y_test = y[n_train:]

In [0]:
# Proposta de modelo: 30 - 64 relu - 32 relu - 2 softmax + cross entropy

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, activation=tf.nn.relu, input_dim=30))   
model.add(tf.keras.layers.Dense(32, activation=tf.nn.relu)) 
model.add(tf.keras.layers.Dense(2, activation=tf.nn.softmax)) 

opt = tf.keras.optimizers.Adam(lr=0.001)
model.compile(loss='categorical_crossentropy',
             metrics=['accuracy'], optimizer=opt)  

model.fit(x_train, y_train, epochs=10, batch_size=5)

print("\n\n")

val_loss, val_acc = model.evaluate(x_test, y_test)
print("\nLoss: ", val_loss, "\nAccuracy: ", val_acc)

Train on 455 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




Loss:  0.20683436174141734 
Accuracy:  0.92105263


### Proposta de solução para o house pricing

In [0]:
# Investigação do formato

print(x2.shape, y2.shape)

(506, 13) (506,)


In [0]:
# Separação dos dados em treino/teste

n_train = round(x2.shape[0] * 0.8)
x2_train = x2[0:n_train]
x2_test = x2[n_train:]

y2_train = y2[0:n_train]
y2_test = y2[n_train:]

In [0]:
# O modelo proposto aqui visa generalizar ao máximo pois estava apresentando
# alto grau de overfitting, por isso esta quantidade de hidden layers.

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu, input_dim=13))   
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu)) 
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(6, activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(1, activation=tf.nn.relu)) 

opt = tf.keras.optimizers.Adam(lr=0.0001)
model.compile(loss='mean_squared_error', optimizer=opt)  

model.fit(x2_train, y2_train, epochs=30, batch_size=1)

print("\n\n")

val_loss = model.evaluate(x2_test, y2_test)
print("\nLoss: ", val_loss)

Train on 405 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30




Loss:  16.93345551443572


In [0]:
# Pegando as predições do modelo para os casos de teste

y2_pred = model.predict(x2_test)

In [0]:
# Printando os valores de predição e valores reais

print("Predição    ~    Real\n")
for i, pred in enumerate(y2_pred):
  print("%.2f" %pred[0], "      ~    ", y2_test[i])

Predição    ~    Real

14.60       ~     5.0
17.11       ~     11.9
18.17       ~     27.9
16.74       ~     17.2
14.73       ~     27.5
10.50       ~     15.0
12.20       ~     17.2
10.45       ~     17.9
14.34       ~     16.3
9.56       ~     7.0
11.06       ~     7.2
11.77       ~     7.5
12.51       ~     10.4
8.36       ~     8.8
12.54       ~     8.4
17.75       ~     16.7
17.96       ~     14.2
17.37       ~     20.8
11.95       ~     13.4
12.53       ~     11.7
11.34       ~     8.3
12.87       ~     10.2
11.28       ~     10.9
13.71       ~     11.0
12.66       ~     9.5
13.78       ~     14.5
13.42       ~     14.1
14.77       ~     16.1
14.39       ~     14.3
13.95       ~     11.7
13.47       ~     13.4
12.42       ~     9.6
11.19       ~     8.7
11.57       ~     8.4
18.38       ~     12.8
17.62       ~     10.5
18.53       ~     17.1
19.21       ~     18.4
18.58       ~     15.4
15.47       ~     10.8
12.31       ~     11.8
17.78       ~     14.9
18.87       ~     12.6
1