# Exercício 02 - Comparação de Redes Neurais Rasas

Aluno: Frederico Luis de Azevedo

Professor: Dr. Francisco de Assis Boldt

## Bibliotecas e Inicialização

In [69]:
from sklearn.datasets import make_classification
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.utils import Bunch
from sklearn.svm import SVC

import numpy as np
import time

# Graficos
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap



## Classificadores Utilizados

A comparação das redes neurais será feita utilizados os seguintes classificadores
- SVM com Kernel linear
- SGD com função de perda 'hinge' (SMV Linear treinada com descida de gradiente)

In [71]:
svm = SVC(kernel='linear')
sgd = SGDClassifier(loss='hinge')

In [80]:
clfs = [
    ('SGD',sgd),
    ('SVM',svm)
]

## Bases de dados

Para este exercício serão construídas as seguintes bases de dados binárias:
- Base 1: 10.000 registros com 20 características. A base será balanceada
- Base 2: 10.000 registros com 20 características. A base será desbalanceada

In [92]:
X, y = make_classification(n_samples=10000, n_features=20, weights=[0.5, 0.5])

unique, counts = np.unique(y, return_counts=True)
dict(zip(unique, counts))

{0: 4996, 1: 5004}

In [97]:
X2, y2 = make_classification(n_samples=10000, n_features=20, weights=[0.1, 0.9])

unique, counts = np.unique(y2, return_counts=True)
print(dict(zip(unique, counts)))
print()

{0: 1042, 1: 8958}


## Avaliação de Performance

A avaliação da performance das redes neurais será feita com uma versão da classe PerformanceEvaluator criada na disciplina de Reconhecimento de Padrões

In [94]:
class PerformanceEvaluator():
  def __init__(self, datasets, cross_val = False):
    self.datasets = datasets
    self.cross_val = cross_val
  def score(self, clf, X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    clf.fit(X_train, y_train)
    return clf.score(X_test, y_test)
  def evaluate(self, clfs):
    for name, dataset in self.datasets:
        print('-------- {} --------'.format(name))
        print(' ')
        
        for clf_name, clf in clfs:
            start_time = time.time()
            if (self.cross_val):
                scores = cross_val_score(clf, dataset.data, dataset.target, cv=5)
                print('{:>25}: {}'.format(clf_name, scores))
                print('{:>25}: {}'.format('Mean', scores.mean()))
                print('{:>25}: {}'.format('Std Deviation', scores.std()))
                print('{:>25}: {}'.format('Median', np.median(scores)))
                print('{:>25}: {}'.format('Best', scores.max()))
            else:
                print('{:>25}: {}'.format(clf_name, self.score(clf, dataset.data, dataset.target)))
            
            elapsed_time = time.time() - start_time
            print('{:>25}: {}'.format('Time Spent', time.strftime("%H:%M:%S", time.gmtime(elapsed_time))))
            print(' ')
       
        print(' ')

In [95]:
datasets = [
    ('Base 1',Bunch(data=X, target=y)),
    ('Base 2',Bunch(data=X2, target=y2))
]

In [96]:
pe = PerformanceEvaluator(datasets, cross_val = False)
pe.evaluate(clfs)

-------- Base 1 --------
 
                      SGD: 0.8839393939393939
               Time Spent: 00:00:00
 
                      SVM: 0.8948484848484849
               Time Spent: 00:00:01
 
 
-------- Base 2 --------
 
                      SGD: 0.9448484848484848
               Time Spent: 00:00:00
 
                      SVM: 0.943030303030303
               Time Spent: 00:00:00
 
 
