In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os 
os.chdir('/content/drive/My Drive/Mestrado/2021.1/Redes Neurais Artificiais/RNA/')

Mounted at /content/drive


In [13]:
import numpy as np
import pandas as pd
import math

from models import utils, metrics, plots

from sklearn.datasets import load_iris, load_breast_cancer
import pandas as pd
import numpy as np
import random
import seaborn as sns
from copy import deepcopy

In [3]:
list_col_names = ['incidencia_pelvica', 
                  'inclinacao_pelvica', 
                  'angulo_lordose_lombar', 
                  'inclinacao_sacral', 
                  'radio_pelvico',
                  'grau_espondilolistese', 'target']

df_column = pd.read_csv('./datasets/vertebral_column/column_3C.dat', names = list_col_names, sep=' ')

dict_ = dict(zip(df_column['target'].unique().tolist(), range(len(df_column['target'].unique().tolist()))))

df_column['target'] = df_column['target'].apply(lambda row: dict_[row])

for col in df_column.columns:
    if col != 'target':
        df_column = utils.normalize_col(df_column, col)

In [4]:
df_column['target'].value_counts()

1    150
2    100
0     60
Name: target, dtype: int64

In [106]:
def process(y):
    if y == 0:
        return [1, 0, 0]
    if y == 1:
        return [0, 1, 0]
    if y == 2:
        return [0, 0, 1]

def back(y):
    if y == [1, 0, 0]:
        return 0
    if y == [0, 1, 0]:
        return 1
    if y == [0, 0, 1]:
        return 2

def process_one_class(y):
    if y == 0:
        return [1, 0]
    if y == 1:
        return [0, 1]

def back_one_class(y):
    if y == [1, 0]:
        return 0
    if y == [0, 1]:
        return 1

def process_six_class(y):
    if y == 0:
        return [1, 0, 0, 0, 0, 0]
    if y == 1:
        return [0, 1, 0, 0, 0, 0]
    if y == 2:
        return [0, 0, 1, 0, 0, 0]
    if y == 3:
        return [0, 0, 0, 1, 0, 0]
    if y == 4:
        return [0, 0, 0, 0, 1, 0]
    if y == 5:
        return [0, 0, 0, 0, 0, 1]

def back_six_class(y):
    if y == [1, 0, 0, 0, 0, 0]: return 0
    if y == [0, 1, 0, 0, 0, 0]: return 1
    if y == [0, 0, 1, 0, 0, 0]: return 2
    if y == [0, 0, 0, 1, 0, 0]: return 3
    if y == [0, 0, 0, 0, 1, 0]: return 4
    if y == [0, 0, 0, 0, 0, 1]: return 5

In [6]:
X_train, y_train, X_test, y_test = utils.split_train_test(df_column, 'target', train_size=0.8, stratify=True)

df_train = X_train.copy()
df_train['target'] = [process(y) for y in y_train]

df_test = X_test.copy()
df_test['target'] = [process(y) for y in y_test]

In [107]:
class RBF():

    def __init__(self, n_neuron_hide=5, sigma=2, qt_classes=3):

        self.q = n_neuron_hide
        self.sigma = sigma
        self.qt_classes = qt_classes

    def logistic(u):
        return 1.0 / (1.0 + np.exp(-u))

    def tanh(u):
        return (1 - np.exp(-u)) / (1 + np.exp(-u))
    
    def degrau(u, threshold=0):
        if u >= threshold: return 1
        else: return 0

    def get_optimal_weigths(df_train, col_target='target'):
        '''
        Função da OLAM
        '''
        X = np.asmatrix(df_train.drop([col_target], axis=1).values)
        D = np.asmatrix(np.array(df_train[col_target].tolist()))

        try:
            W = ((X.T * X).I * X.T * D)
        except:
            W = np.linalg.pinv(X) * D
        return W

    def predict_row(row, W, qt_classes=3):
        '''
        Função da OLAM
        '''
        result = (row.tolist() * W).tolist()[0]
        result_ = [RBF.logistic(y) for y in result]
        if qt_classes <= 2:
            df = pd.DataFrame(zip([process_one_class(i) for i in range(qt_classes)], result_), columns=['class', 'prob'])
        elif qt_classes == 6:
            df = pd.DataFrame(zip([process_six_class(i) for i in range(qt_classes)], result_), columns=['class', 'prob'])
        else:
            df = pd.DataFrame(zip([process(i) for i in range(qt_classes)], result_), columns=['class', 'prob'])
        output = df.sort_values(by='prob', ascending=False).iloc[0][0]
        return output


    def predict_dataframe(df, W, qt_classes=3):
        '''
        Função da OLAM
        '''
        list_result = []
        for idx, _ in df.iterrows():
            list_result.append(RBF.predict_row(df.iloc[idx], W, qt_classes=qt_classes))

        return list_result
    
    
    def fit(self, X_train, y_train):

        df_train = X_train.copy()
        df_train['target'] = y_train

        self.padroes_centro = np.random.uniform(size=(self.q, len(X_train.columns))) #row, col
        list_h = []
        for idx, row in X_train.iterrows():

            x = X_train.iloc[idx].tolist()
            
            h = []
            for padrao in self.padroes_centro:
                sub = np.subtract(np.array(x), np.array(padrao))
                calc = np.exp(-(np.dot(np.array(sub).T, np.array(sub)) / 2 * (self.sigma ** 2)))
                h.append(calc)

            list_h.append(h)

        M = np.asmatrix(list_h)
        input_olam = np.insert(M, 0, -1, axis=1)
        df_input_olam = pd.DataFrame(input_olam, columns = [f'h{i}' for i in range(input_olam.shape[1])])
        df_input_olam = pd.DataFrame(input_olam, columns = [f'h{i}' for i in range(input_olam.shape[1])])
        df_input_olam['target'] = df_train['target']

        self.weigths = RBF.get_optimal_weigths(df_input_olam, col_target='target')

    
    def predict(self, X_test):

        list_h_test = []
        for idx, row in X_test.iterrows():

            x = X_test.iloc[idx].tolist()
            
            h = []
            for padrao in self.padroes_centro:
                
                sub = np.subtract(np.array(x), np.array(padrao))
                calc = np.exp(-(np.dot(np.array(sub).T, np.array(sub)) / 2 * (self.sigma ** 2)))
                h.append(calc)

            list_h_test.append(h)

        M_teste = np.asmatrix(list_h_test)
        input_olam_test = np.insert(M_teste, 0, -1, axis=1)
        df_input_olam_test = pd.DataFrame(input_olam_test, columns = [f'h{i}' for i in range(input_olam_test.shape[1])])
        
        return RBF.predict_dataframe(df_input_olam_test, self.weigths, self.qt_classes)


In [26]:
model_rbf = RBF(n_neuron_hide=5, sigma=2, qt_classes=3)

In [9]:
model_rbf.fit(df_train.drop(['target'], axis=1).copy(), df_train['target'])

In [10]:
df_test['predict'] = model_rbf.predict(X_test.copy())

In [11]:
y_true = df_test['target'].apply(lambda x: back(x))
y_pred = df_test['predict'].apply(lambda x: back(x))

In [12]:
metrics.taxa_acerto(y_true, y_pred)

0.7741935483870968

### Câncer de Mama

In [97]:
data = load_breast_cancer()
df_cancer = pd.DataFrame(data.data, columns=data.feature_names)
df_cancer['target'] = pd.Series(data.target)

for col in df_cancer.columns:
    if col != 'target':
        df_cancer = utils.normalize_col(df_cancer, col)

df_cancer['target'].value_counts() # 0 = 'malignant', 1 = 'benign'

1    357
0    212
Name: target, dtype: int64

In [98]:
X_train, y_train, X_test, y_test = utils.split_train_test(df_cancer, 'target', train_size=0.8, stratify=True)

df_train = X_train.copy()
df_train['target'] = [process_one_class(y) for y in y_train]

df_test = X_test.copy()
df_test['target'] = [process_one_class(y) for y in y_test]

In [99]:
model_rbf = RBF(n_neuron_hide=100, sigma=5, qt_classes=2)

In [100]:
model_rbf.fit(df_train.drop(['target'], axis=1).copy(), df_train['target'])

In [101]:
df_test['predict'] = model_rbf.predict(X_test.copy())

  # Remove the CWD from sys.path while we load stuff.


In [102]:
y_true = df_test['target'].apply(lambda x: back_one_class(x))
y_pred = df_test['predict'].apply(lambda x: back_one_class(x))

In [103]:
metrics.taxa_acerto(y_true, y_pred)

0.7964601769911505

### Dermatologia

In [104]:
list_col_names = [
      'erythema',
      'scaling',
      'definite borders',
      'itching',
      'koebner phenomenon',
      'polygonal papules',
      'follicular papules',
      'oral mucosal involvement',
      'knee and elbow involvement',
     'scalp involvement',
     'family history',
     'melanin incontinence',
     'eosinophils in the infiltrate',
     'PNL infiltrate',
     'fibrosis of the papillary dermis',
     'exocytosis',
     'acanthosis',
     'hyperkeratosis',
     'parakeratosis',
     'clubbing of the rete ridges',
     'elongation of the rete ridges',
     'thinning of the suprapapillary epidermis',
     'spongiform pustule',
     'munro microabcess',
     'focal hypergranulosis',
     'disappearance of the granular layer',
     'vacuolisation and damage of basal layer',
     'spongiosis',
     'saw-tooth appearance of retes',
     'follicular horn plug',
     'perifollicular parakeratosis',
     'inflammatory monoluclear inflitrate',
     'band-like infiltrate',
     'age', 
     'target']

df_dermatology = pd.read_csv('./datasets/dermatology/dermatology.data', names = list_col_names)
df_dermatology['age'] =  pd.to_numeric(df_dermatology['age'], downcast='integer', errors='coerce')

df_dermatology['target'] = df_dermatology['target'].apply(lambda y: (y - 1)) # Formatando a variável resposta para começar do 0

df_dermatology = df_dermatology.fillna(df_dermatology.mean()).astype('int32') # Preenchendo valores null com média

for col in df_dermatology.columns:
    if col != 'target':
        df_dermatology = utils.normalize_col(df_dermatology, col)

In [105]:
df_dermatology['target'].value_counts()

0    112
2     72
1     61
4     52
3     49
5     20
Name: target, dtype: int64

In [108]:
X_train, y_train, X_test, y_test = utils.split_train_test(df_dermatology, 'target', train_size=0.8, stratify=True)

df_train = X_train.copy()
df_train['target'] = [process_six_class(y) for y in y_train]

df_test = X_test.copy()
df_test['target'] = [process_six_class(y) for y in y_test]

In [129]:
model_rbf = RBF(n_neuron_hide=20, sigma=0.1, qt_classes=6)

In [130]:
model_rbf.fit(df_train.drop(['target'], axis=1).copy(), df_train['target'])

In [131]:
df_test['predict'] = model_rbf.predict(X_test.copy())

In [132]:
y_true = df_test['target'].apply(lambda x: back_six_class(x))
y_pred = df_test['predict'].apply(lambda x: back_six_class(x))

In [133]:
 metrics.taxa_acerto(y_true, y_pred)

0.9166666666666666