In [None]:
import os

import pandas as pd
import io
import requests
import numpy as np
import math
import itertools    

from sklearn import preprocessing
from sklearn import model_selection
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import confusion_matrix

import tensorflow as tf
from tensorflow import keras

print(tf.version)

from keras.layers import Dense
from keras.layers import Dropout
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import Adam, Adamax, Adagrad, Ftrl

import matplotlib.pyplot as plt


from rbf_tf.rbflayer import RBFLayer, InitCentersRandom
from rbf_tf.kmeans_initializer import InitCentersKMeans

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/thyroid-disease/ann-train.data"
s = requests.get(url).contents=requests.get(url).content
dataTrain=pd.read_csv(io.StringIO(s.decode('utf-8')),delimiter=' ',header=None)

dataTrain.head()

print(dataTrain.shape)
dataTrain.head()

In [None]:
del dataTrain[22]
del dataTrain[23]


del dataTrain[11]
del dataTrain[12]
del dataTrain[14]

In [None]:
dataTrain.head()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
dataTrain.hist(bins=50, figsize=(20,15))
plt.show()

In [None]:
#Predictor data
inputTrain = dataTrain.drop([21], axis=1)
inputTrain.head()        

In [None]:
#Target data
outputTrain = dataTrain[21] - 1
outputTrain.head()

In [None]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/thyroid-disease/ann-test.data"
s = requests.get(url).contents=requests.get(url).content
dataTest=pd.read_csv(io.StringIO(s.decode('utf-8')),delimiter=' ',header=None)

dataTest.head()

print(dataTest.shape)
dataTest.head()

In [None]:
del dataTest[22]
del dataTest[23]


del dataTest[11]
del dataTest[12]
del dataTest[14]

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
dataTest.hist(bins=50, figsize=(20,15))
plt.show()

In [None]:
inputTest = dataTest.drop([21], axis=1)
outputTest = dataTest[21] - 1

In [None]:
scaler = preprocessing.MinMaxScaler()
# fit using the train set
scaler.fit(inputTrain)
# transform the test test
xtrainN = scaler.transform(inputTrain)
xtestN = scaler.transform(inputTest) 

In [None]:
outputTrain_one_hot = keras.utils.to_categorical(outputTrain)

In [None]:
# '''print("Proporção das classes no dataset")
# counts = np.bincount(outputTrain)
# print(counts)
# print("Dados de Treinamento:",  counts/ float(len(outputTrain)))
# print("Dados de Teste:", np.bincount(outputTest) / float(len(outputTest)))

# peso_classe1 = 1.0/counts[0]
# peso_classe2 = 1.0/counts[1]
# peso_classe3 = 1.0/counts[2]
# '''

class_weights = compute_class_weight('balanced', np.unique(outputTrain), outputTrain)
print(class_weights)
d_class_weights = dict(enumerate(class_weights))

In [None]:
 X_train, X_validation, y_train, y_validation = model_selection.train_test_split(
     xtrainN, 
     outputTrain_one_hot, 
     test_size = 0.3, 
     random_state = 1, 
     stratify=outputTrain
 )  

In [None]:
model = Sequential(
    [
        keras.Input(shape=(X_train.shape[-1],)),
        RBFLayer(700,
                     initializer=InitCentersKMeans(X_train),
                      betas=13.0),
        Dense(3, activation="softmax"),
    ]
)
model.summary()

# model = Sequential(
#     [
#         keras.Input(shape=(X_train.shape[-1],)),
#         RBFLayer(700,
# #                       initializer=InitCentersRandom(X_train),
#                      initializer=InitCentersKMeans(X_train),
#                       betas=8.0),
# #                       input_shape=(X_train.shape[-1],)),
# #         Dense(256, activation="selu", input_shape=(X_train.shape[-1],)
# #         ),
# #         Dense(256, activation="selu"),
# #         Dropout(0.3),
# #         Dense(256, activation="selu"),
# #         Dropout(0.3),
#         Dense(3, activation="softmax"),
#     ]
# )
# model.summary()

In [None]:
metrics = [
    keras.metrics.FalseNegatives(name="fn"),
    keras.metrics.FalsePositives(name="fp"),
    keras.metrics.TrueNegatives(name="tn"),
    keras.metrics.TruePositives(name="tp"),
    keras.metrics.Precision(name="precision"),
    keras.metrics.Recall(name="recall"),
]

model.compile(
    optimizer=Adam(1e-3), loss="binary_crossentropy", metrics=metrics
#          optimizer=Ftrl(learning_rate=0.001, name="Ftrl"), loss="binary_crossentropy", metrics=metrics
)

callbacks = [keras.callbacks.ModelCheckpoint("thyroid_model_at_epoch_{epoch}.h5")]

history = model.fit(
    X_train,
    y_train,
    batch_size=200,
    epochs=500,
    verbose=1,
    callbacks=callbacks,
    validation_data=(X_validation, y_validation),
    class_weight=d_class_weights,
)

plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()


In [None]:
TrainPredictions = model.predict(inputTrain)
TrainPredictions = np.argmax(TrainPredictions, axis=1)

In [None]:
conf = confusion_matrix(outputTrain, TrainPredictions)

classes = [0, 1, 2]
# plot confusion matrix
plt.imshow(conf, interpolation='nearest', cmap=plt.cm.Greens)
plt.title("Train Confusion Matrix")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes)
plt.yticks(tick_marks, classes)

fmt = 'd'
thresh = conf.max() / 2.
for i, j in itertools.product(range(conf.shape[0]), range(conf.shape[1])):
    plt.text(j, i, format(conf[i, j], fmt),
             horizontalalignment="center",
             color="white" if conf[i, j] > thresh else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')

In [None]:
print('Confusion Matrix\n')
print(conf)

#importing accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
print('\nAccuracy: {:.2f}\n'.format(accuracy_score(outputTrain, TrainPredictions)))

print('Micro Precision: {:.2f}'.format(precision_score(outputTrain, TrainPredictions, average='micro')))
print('Micro Recall: {:.2f}'.format(recall_score(outputTrain, TrainPredictions, average='micro')))
print('Micro F1-score: {:.2f}\n'.format(f1_score(outputTrain, TrainPredictions, average='micro')))

print('Macro Precision: {:.2f}'.format(precision_score(outputTrain, TrainPredictions, average='macro')))
print('Macro Recall: {:.2f}'.format(recall_score(outputTrain, TrainPredictions, average='macro')))
print('Macro F1-score: {:.2f}\n'.format(f1_score(outputTrain, TrainPredictions, average='macro')))

print('Weighted Precision: {:.2f}'.format(precision_score(outputTrain, TrainPredictions, average='weighted')))
print('Weighted Recall: {:.2f}'.format(recall_score(outputTrain, TrainPredictions, average='weighted')))
print('Weighted F1-score: {:.2f}'.format(f1_score(outputTrain, TrainPredictions, average='weighted')))

from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(outputTrain, TrainPredictions, target_names=['Class 1', 'Class 2', 'Class 3']))

In [None]:
TestPredictions = model.predict(inputTest)
TestPredictions = np.argmax(TestPredictions, axis=1)

In [None]:
conf = confusion_matrix(outputTest, TestPredictions)

classes = [0, 1, 2]
# plot confusion matrix
plt.imshow(conf, interpolation='nearest', cmap=plt.cm.Greens)
plt.title("Test Confusion Matrix")
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes)
plt.yticks(tick_marks, classes)

fmt = 'd'
thresh = conf.max() / 2.
for i, j in itertools.product(range(conf.shape[0]), range(conf.shape[1])):
    plt.text(j, i, format(conf[i, j], fmt),
             horizontalalignment="center",
             color="white" if conf[i, j] > thresh else "black")

plt.tight_layout()
plt.ylabel('True label')
plt.xlabel('Predicted label')

In [None]:
print('Confusion Matrix\n')
print(conf)

print('\nAccuracy: {:.2f}\n'.format(accuracy_score(outputTest, TestPredictions)))

print('Micro Precision: {:.2f}'.format(precision_score(outputTest, TestPredictions, average='micro')))
print('Micro Recall: {:.2f}'.format(recall_score(outputTest, TestPredictions, average='micro')))
print('Micro F1-score: {:.2f}\n'.format(f1_score(outputTest, TestPredictions, average='micro')))

print('Macro Precision: {:.2f}'.format(precision_score(outputTest, TestPredictions, average='macro')))
print('Macro Recall: {:.2f}'.format(recall_score(outputTest, TestPredictions, average='macro')))
print('Macro F1-score: {:.2f}\n'.format(f1_score(outputTest, TestPredictions, average='macro')))

print('Weighted Precision: {:.2f}'.format(precision_score(outputTest, TestPredictions, average='weighted')))
print('Weighted Recall: {:.2f}'.format(recall_score(outputTest, TestPredictions, average='weighted')))
print('Weighted F1-score: {:.2f}'.format(f1_score(outputTest, TestPredictions, average='weighted')))

from sklearn.metrics import classification_report
print('\nClassification Report\n')
print(classification_report(outputTest, TestPredictions, target_names=['Class 1', 'Class 2', 'Class 3']))