In [8]:
#definindo imports
from sklearn.base import BaseEstimator, ClassifierMixin
from collections import Counter
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

from sklearn.metrics import accuracy_score

from scipy.sparse import coo_matrix
from sklearn.utils import shuffle

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification

from sklearn.model_selection import train_test_split

In [9]:
class PerceptronJM(BaseEstimator, ClassifierMixin):
    
    def __init__(self, num_epocas = 10000, taxa_aprendizado = 0.01):
        self.num_epocas = num_epocas
        self.taxa_aprendizado = taxa_aprendizado
        self.bias = 0
        self.pesos = None
        self.erros = None
        self.matriz_pesos = []
        self.acuracia = []
        
    def funcao_treina(self, x):
        
        #insere o bias no vetor de atributos
        x_bias = np.hstack((self.bias, x))
        
        #calcula o campo induzido
        v = np.dot(self.pesos, x_bias)
        
        #calcula a saída do perceptron
        y_aux = self.funcao_ativacao(v)
        
        return y_aux, x_bias

    def funcao_ativacao(self, x):
        return 1 if (np.dot(self.pesos, x) >= self.bias) else 0

    def fit(self, X, Y):
        
        #converte o X e Y para o formato do numpy
        #isso garante funcionamento caso eles venham de dataframe
        X = np.array(X)
        Y = np.array(Y)
        
        #inicia o vetor de pesos
        self.pesos = np.ones(X.shape[1])
        
        max_acuracia = 0

        #para todas as epocas
        for i in range(self.num_epocas):
            k = 0
            for x, y in zip(X, Y):
                
                y_pred = self.funcao_ativacao(x)
                if y == 1 and y_pred == 0:
                    self.pesos = self.pesos + self.taxa_aprendizado * x
                    self.bias  = self.bias  - self.taxa_aprendizado * 1
                elif y == 0 and y_pred == 1:
                    self.pesos = self.pesos - self.taxa_aprendizado * x
                    self.bias  = self.bias  + self.taxa_aprendizado * 1
                
            self.matriz_pesos.append(self.pesos)    
            self.acuracia.append(accuracy_score(self.predict(X), Y))
            if (self.acuracia[i] > max_acuracia):
                max_acuracia = self.acuracia[i]
                chkptw = self.pesos
                chkptb = self.bias
        #checkpoint (Save the weights and b value)
        self.pesos = chkptw
        self.bias  = chkptb
    
    def predict(self, X):
        
        X = np.array(X)        
        result = []
        
        for x in X:
            y_predict = self.funcao_ativacao(x)
            result.append(y_predict)
        
        return np.array(result)

In [10]:
from sklearn.linear_model import Perceptron

names = ["Percep. - JM", "Percep. Scikit", "Percep. - JM - 50k", "Percep. Scikit - 50k",]

classifiers = [
    PerceptronJM(num_epocas = 10000),
    Perceptron(max_iter = 10000, random_state = 42),
    PerceptronJM(num_epocas = 50000),
    Perceptron(max_iter = 50000, random_state = 42)
]

In [11]:
# teste 1 - tudo default
X, y = make_classification(random_state = 42)
linearly_separable_teste_1 = (X, y)

# teste 2 - copia do https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html#sphx-glr-auto-examples-classification-plot-classifier-comparison-py
X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state = 42, n_clusters_per_class=1)
rng = np.random.RandomState(42)
X += 2 * rng.uniform(size=X.shape)
linearly_separable_teste_2 = (X, y)

# teste 3 - aumentando número de amostras
X, y = make_classification(n_samples = 5000, random_state = 42)
linearly_separable_teste_3 = (X, y)

# teste 4 - aumentando número de amostras e features
X, y = make_classification(n_samples = 5000, n_features = 30, random_state = 42)
linearly_separable_teste_4 = (X, y)

# teste 5 - aumentando número de amostras e features, mas diminuindo o class_sep
X, y = make_classification(n_samples = 5000, n_features = 30, class_sep = 0.8, random_state = 42)
linearly_separable_teste_5 = (X, y)

datasets = [
    linearly_separable_teste_1,
    linearly_separable_teste_2,
    linearly_separable_teste_3,
    linearly_separable_teste_4,
    linearly_separable_teste_5
]

In [12]:
for ds_cnt, ds in enumerate(datasets):
    # preprocess dataset, split into training and test part
    X, y = ds
    X = StandardScaler().fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.4, random_state=42)

    # iterate over classifiers
    for name, clf in zip(names, classifiers):
        clf.fit(X_train, y_train)
        score = clf.score(X_test, y_test)
        
        print("Teste: {}\nClassificador: {}\nScore: {}\n".format(ds_cnt+1, name, score))

Teste: 1
Classificador: Percep. - JM
Score: 0.825

Teste: 1
Classificador: Percep. Scikit
Score: 0.85

Teste: 1
Classificador: Percep. - JM - 50k
Score: 0.825

Teste: 1
Classificador: Percep. Scikit - 50k
Score: 0.85

Teste: 2
Classificador: Percep. - JM
Score: 0.85

Teste: 2
Classificador: Percep. Scikit
Score: 0.85

Teste: 2
Classificador: Percep. - JM - 50k
Score: 0.85

Teste: 2
Classificador: Percep. Scikit - 50k
Score: 0.85

Teste: 3
Classificador: Percep. - JM
Score: 0.7465

Teste: 3
Classificador: Percep. Scikit
Score: 0.8155

Teste: 3
Classificador: Percep. - JM - 50k
Score: 0.7465

Teste: 3
Classificador: Percep. Scikit - 50k
Score: 0.8155

Teste: 4
Classificador: Percep. - JM
Score: 0.8175

Teste: 4
Classificador: Percep. Scikit
Score: 0.791

Teste: 4
Classificador: Percep. - JM - 50k
Score: 0.8175

Teste: 4
Classificador: Percep. Scikit - 50k
Score: 0.791

Teste: 5
Classificador: Percep. - JM
Score: 0.742

Teste: 5
Classificador: Percep. Scikit
Score: 0.751

Teste: 5
Classif