#### Implementação do K-Means

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
from sklearn.decomposition import PCA
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
 
#Load Data

# url = "lib/drybean/Dry_Bean_Dataset.xlsx"
url = 'https://docs.google.com/spreadsheets/d/1N-KOihE3XDDfHEsaj8dt9-y8jrVOv8wB/edit?usp=sharing&ouid=102198786401215745194&rtpof=true&sd=true'
path = 'https://drive.google.com/uc?export=download&id='+url.split('/')[-2]
# loading dataset into Pandas DataFrame
df = pd.read_excel(path, engine = 'openpyxl')

df.head()

Unnamed: 0,Area,Perimeter,MajorAxisLength,MinorAxisLength,AspectRation,Eccentricity,ConvexArea,EquivDiameter,Extent,Solidity,roundness,Compactness,ShapeFactor1,ShapeFactor2,ShapeFactor3,ShapeFactor4,Class
0,28395,610.291,208.178117,173.888747,1.197191,0.549812,28715,190.141097,0.763923,0.988856,0.958027,0.913358,0.007332,0.003147,0.834222,0.998724,SEKER
1,28734,638.018,200.524796,182.734419,1.097356,0.411785,29172,191.27275,0.783968,0.984986,0.887034,0.953861,0.006979,0.003564,0.909851,0.99843,SEKER
2,29380,624.11,212.82613,175.931143,1.209713,0.562727,29690,193.410904,0.778113,0.989559,0.947849,0.908774,0.007244,0.003048,0.825871,0.999066,SEKER
3,30008,645.884,210.557999,182.516516,1.153638,0.498616,30724,195.467062,0.782681,0.976696,0.903936,0.928329,0.007017,0.003215,0.861794,0.994199,SEKER
4,30140,620.134,201.847882,190.279279,1.060798,0.33368,30417,195.896503,0.773098,0.990893,0.984877,0.970516,0.006697,0.003665,0.9419,0.999166,SEKER


In [12]:
# obter apenas os valores 
features = df.columns[:-1] ## Retirada do Resultado                     
x = df.loc[:, features].values
y = df.loc[:,'Class'].values

In [13]:
## Dividindo o dataset
train_features, test_features, train_targets, test_targets = train_test_split(
    x                       ## Array de Features
    , y                     ## Array de Targets
    , test_size = 0.2       ## Percentual do Conjunto de Testes
    , random_state = 123    ## Estado de Random - Serve para manter os valores 
    , stratify = y)         ## Manter proporção de valores da saída do treino similar a saída dos testes

In [32]:
def buildMPLPerceptron(train_features, test_features, train_targets, test_targets, num_neurons = 2, activation_function = 'relu', optimization_function = 'sgd', learning = 'invscaling', alp = 0.0001):
    global classifier
    classifier = MLPClassifier(
        hidden_layer_sizes = num_neurons
        , max_iter = 35
        , activation = activation_function
        , solver = optimization_function
        , verbose = False
        , random_state= 457
        , learning_rate = learning
        , alpha = alp
        )
    classifier.fit(train_features, train_targets)
    predictions = classifier.predict(test_features)
    score = np.round(metrics.accuracy_score(test_targets, predictions), 2)

    return score

In [15]:
score = buildMPLPerceptron(train_features, test_features, train_targets, test_targets, num_neurons = 2)
print(f"Accuracy: {score*100}%")

Accuracy: 26.0%


In [16]:
## Variando o número de neurônios

scores_dict = dict()
for i in range(1 , 15):
    score = buildMPLPerceptron(train_features, test_features, train_targets, test_targets, num_neurons = i)
    scores_dict[i] = score

In [17]:
scores_dict

{1: 0.15,
 2: 0.26,
 3: 0.26,
 4: 0.14,
 5: 0.14,
 6: 0.26,
 7: 0.1,
 8: 0.19,
 9: 0.26,
 10: 0.1,
 11: 0.04,
 12: 0.19,
 13: 0.26,
 14: 0.26}

In [18]:
max_idx = max(scores_dict, key= lambda x: scores_dict[x])
print(f"Melhor resultado com {max_idx} neurônios. Acurácia: {scores_dict[max_idx]*100}%")

Melhor resultado com 2 neurônios. Acurácia: 26.0%


In [19]:
# activation{'identity’, ‘logistic’, ‘tanh’, ‘relu’}, default=’relu’
## Testar diferentes Funções de Ativação
activation = ['identity', 'logistic', 'tanh', 'relu']
scores_dict = dict()
for act in activation:
    score = buildMPLPerceptron(train_features, test_features, train_targets, test_targets, activation_function = act)
    scores_dict[act] = score


In [20]:
scores_dict

{'identity': 0.49, 'logistic': 0.12, 'tanh': 0.12, 'relu': 0.26}

In [21]:
max_idx = max(scores_dict, key= lambda x: scores_dict[x])
print(f"Melhor resultado com função de ativação {max_idx}. Acurácia: {scores_dict[max_idx]*100}%")

Melhor resultado com função de ativação identity. Acurácia: 49.0%


In [33]:
## Variando o alpha
alpha = [1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 0.0005, 0.0001]
scores_dict = dict()
for a in alpha:
    score = buildMPLPerceptron(train_features, test_features, train_targets, test_targets, alp = a)
    scores_dict[a] = score

In [34]:
scores_dict

{1: 0.26,
 0.5: 0.26,
 0.1: 0.26,
 0.05: 0.26,
 0.01: 0.26,
 0.005: 0.26,
 0.001: 0.26,
 0.0005: 0.26,
 0.0001: 0.26}

In [35]:
max_idx = max(scores_dict, key= lambda x: scores_dict[x])
print(f"Melhor resultado com alpha {max_idx}. Acurácia: {scores_dict[max_idx]*100}%")

Melhor resultado com alpha 1. Acurácia: 26.0%
