###Configs Iniciais

In [1]:
!pip install sklearn_extensions



In [2]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn import metrics 
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from sklearn_extensions.extreme_learning_machines.elm import GenELMClassifier
from sklearn_extensions.extreme_learning_machines.random_layer import RBFRandomLayer, MLPRandomLayer

In [3]:
mushrooms_db = pd.read_csv('/content/sample_data/mushrooms.csv')
mushrooms_db.head(10)

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,stalk-root,stalk-surface-above-ring,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,e,e,s,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,e,c,s,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,e,e,s,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,t,e,s,s,w,w,p,w,o,e,n,a,g
5,e,x,y,y,t,a,f,c,b,n,e,c,s,s,w,w,p,w,o,p,k,n,g
6,e,b,s,w,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,n,m
7,e,b,y,w,t,l,f,c,b,n,e,c,s,s,w,w,p,w,o,p,n,s,m
8,p,x,y,w,t,p,f,c,n,p,e,e,s,s,w,w,p,w,o,p,k,v,g
9,e,b,s,y,t,a,f,c,b,g,e,c,s,s,w,w,p,w,o,p,k,s,m


DETALHAMENTO DAS VARIÁVEIS:
<ol>
<li>cap-shape: bell=b, conical=c, convex=x, flat=f, knobbed=k, sunken=s</li>
<li>cap-surface: fibrous=f, grooves=g, scaly=y, smooth=s</li>
<li>cap-color: brown=n, buff=b, cinnamon=c, gray=g, green=r, pink=p, purple=u, red=e, white=w, yellow=y</li>
<li>bruises: bruises=t, no=f</li>
<li>odor: almond=a, anise=l, creosote=c, fishy=y, foul=f, musty=m, none=n, pungent=p, spicy=s</li>
<li>gill-attachment: attached=a, descending=d, free=f, notched=n</li>
<li>gill-spacing: close=c, crowded=w, distant=d</li>
<li>gill-size: broad=b, narrow=n</li>
<li>gill-color: black=k, brown=n, buff=b, chocolate=h, gray=g, green=r, orange=o, pink=p, purple=u, red=e, white=w, yellow=y</li>
<li>stalk-shape: enlarging=e, tapering=t</li>
<li>stalk-root: bulbous=b, club=c, cup=u, equal=e, rhizomorphs=z, rooted=r, missing=?</li>
<li>stalk-surface-above-ring: fibrous=f, scaly=y, silky=k, smooth=s</li>
<li>stalk-surface-below-ring: fibrous=f, scaly=y, silky=k, smooth=s</li>
<li>stalk-color-above-ring: brown=n, buff=b, cinnamon=c, gray=g, orange=o, pink=p, red=e, white=w, yellow=y</li>
<li>stalk-color-below-ring: brown=n, buff=b, cinnamon=c, gray=g, orange=o, pink=p, red=e, white=w, yellow=y</li>
<li>veil-type: partial=p, universal=u</li>
<li>veil-color: brown=n, orange=o, white=w, yellow=y</li>
<li>ring-number: none=n, one=o, two=t</li>
<li>ring-type: cobwebby=c, evanescent=e, flaring=f, large=l, none=n, pendant=p, sheathing=s, zone=z</li>
<li>spore-print-color: black=k, brown=n, buff=b, chocolate=h, green=r, orange=o, purple=u, white=w, yellow=y</li>
<li>population: abundant=a, clustered=c, numerous=n, scattered=s, several=v, solitary=y</li>
<li>habitat: grasses=g, leaves=l, meadows=m, paths=p, urban=u, waste=w, woods=d</li>
</ol>

### Checar valores faltantes (missing values)

In [4]:
mushrooms_db.isnull().sum()

class                       0
cap-shape                   0
cap-surface                 0
cap-color                   0
bruises                     0
odor                        0
gill-attachment             0
gill-spacing                0
gill-size                   0
gill-color                  0
stalk-shape                 0
stalk-root                  0
stalk-surface-above-ring    0
stalk-surface-below-ring    0
stalk-color-above-ring      0
stalk-color-below-ring      0
veil-type                   0
veil-color                  0
ring-number                 0
ring-type                   0
spore-print-color           0
population                  0
habitat                     0
dtype: int64

In [5]:
mushrooms_db.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   class                     8124 non-null   object
 1   cap-shape                 8124 non-null   object
 2   cap-surface               8124 non-null   object
 3   cap-color                 8124 non-null   object
 4   bruises                   8124 non-null   object
 5   odor                      8124 non-null   object
 6   gill-attachment           8124 non-null   object
 7   gill-spacing              8124 non-null   object
 8   gill-size                 8124 non-null   object
 9   gill-color                8124 non-null   object
 10  stalk-shape               8124 non-null   object
 11  stalk-root                8124 non-null   object
 12  stalk-surface-above-ring  8124 non-null   object
 13  stalk-surface-below-ring  8124 non-null   object
 14  stalk-color-above-ring  

###Converter variáveis categóricas

In [6]:
mushrooms_aux = pd.get_dummies(mushrooms_db,drop_first=False)
mushrooms_aux.head()

Unnamed: 0,class_e,class_p,cap-shape_b,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_f,cap-surface_g,cap-surface_s,cap-surface_y,cap-color_b,cap-color_c,cap-color_e,cap-color_g,cap-color_n,cap-color_p,cap-color_r,cap-color_u,cap-color_w,cap-color_y,bruises_f,bruises_t,odor_a,odor_c,odor_f,odor_l,odor_m,odor_n,odor_p,odor_s,odor_y,gill-attachment_a,gill-attachment_f,gill-spacing_c,gill-spacing_w,gill-size_b,gill-size_n,gill-color_b,...,stalk-color-below-ring_n,stalk-color-below-ring_o,stalk-color-below-ring_p,stalk-color-below-ring_w,stalk-color-below-ring_y,veil-type_p,veil-color_n,veil-color_o,veil-color_w,veil-color_y,ring-number_n,ring-number_o,ring-number_t,ring-type_e,ring-type_f,ring-type_l,ring-type_n,ring-type_p,spore-print-color_b,spore-print-color_h,spore-print-color_k,spore-print-color_n,spore-print-color_o,spore-print-color_r,spore-print-color_u,spore-print-color_w,spore-print-color_y,population_a,population_c,population_n,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,...,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0
1,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,...,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0
2,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,...,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0
3,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,1,1,0,0,1,0,...,0,0,0,1,0,1,0,0,1,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0
4,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,1,1,0,0,...,0,0,0,1,0,1,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0


### Classificação

1º: Separar o conjunto em variavel resposta e características de treinamento

In [7]:
X = mushrooms_aux.iloc[:].values
y = mushrooms_aux.iloc[:,0].values

In [8]:
X.shape

(8124, 119)

In [9]:
X

array([[0, 1, 0, ..., 0, 1, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 1, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [10]:
y

array([0, 1, 1, ..., 1, 0, 1], dtype=uint8)

###Normalização do Conjunto

In [11]:
X = StandardScaler().fit_transform(X)

In [12]:
X

array([[-1.0366127 ,  1.0366127 , -0.24272523, ..., -0.40484176,
         4.59086996, -0.15558197],
       [ 0.96468045, -0.96468045, -0.24272523, ..., -0.40484176,
        -0.21782364, -0.15558197],
       [ 0.96468045, -0.96468045,  4.11988487, ..., -0.40484176,
        -0.21782364, -0.15558197],
       ...,
       [ 0.96468045, -0.96468045, -0.24272523, ..., -0.40484176,
        -0.21782364, -0.15558197],
       [-1.0366127 ,  1.0366127 , -0.24272523, ..., -0.40484176,
        -0.21782364, -0.15558197],
       [ 0.96468045, -0.96468045, -0.24272523, ..., -0.40484176,
        -0.21782364, -0.15558197]])

Agora dividir em treino e teste (teste com 30%)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state=0)

In [14]:
X_train.shape

(5686, 119)

####Modelo de Rede Neural ELM



In [15]:
nh = 20
funcoes = ['tanh', 'sine', 'tribas', 'sigmoid', 'hardlim', 'softlim', 'gaussian', 'multiquadric', 'inv_multiquadric']


In [16]:
#EML baseado em MLP

acc = []
dictionary = {}

for funcao in range(len(funcoes)):
  srhl = MLPRandomLayer(n_hidden=nh, activation_func=funcoes[funcao], random_state=0)
  elm_model = GenELMClassifier(hidden_layer=srhl)
  elm_model.fit(X_train, y_train)
  acc.append(elm_model.score(X_test, y_test))

dictionary = {'Função':funcoes,'Acuracia':acc}

acc_df = pd.DataFrame(dictionary).sort_values(by='Acuracia', ascending=False)
acc_df

Unnamed: 0,Função,Acuracia
3,sigmoid,0.938884
5,softlim,0.932322
0,tanh,0.92781
4,hardlim,0.926169
7,multiquadric,0.790812
8,inv_multiquadric,0.685398
6,gaussian,0.583265
2,tribas,0.541017
1,sine,0.513536


In [17]:
#EML baseado em rede RBF

acc = []
dictionary = {}

for funcao in range(len(funcoes)):
  srhl_rbf = RBFRandomLayer(n_hidden=nh, rbf_width=1, random_state=0,activation_func=funcoes[funcao])
  elm_model = GenELMClassifier(hidden_layer=srhl_rbf)
  elm_model.fit(X_train, y_train)
  acc.append(elm_model.score(X_test, y_test))

dictionary = {'Função':funcoes,'Acuracia':acc}
# Put the accuracies in a data frame.
acc_df = pd.DataFrame(dictionary).sort_values(by='Acuracia', ascending=False)
acc_df

Unnamed: 0,Função,Acuracia
6,gaussian,0.839212
0,tanh,0.817473
3,sigmoid,0.803527
7,multiquadric,0.792863
8,inv_multiquadric,0.791222
1,sine,0.788351
4,hardlim,0.521739
5,softlim,0.521739
2,tribas,0.478261
