<a href="https://colab.research.google.com/github/badaroz/reconhecimento-padroes/blob/main/problema_essemble.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [136]:
from sklearn.linear_model import LogisticRegression,Perceptron
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import fetch_openml
from sklearn.ensemble import VotingClassifier,StackingClassifier,RandomForestClassifier,ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split,cross_validate,GridSearchCV

import pandas as pd
import numpy as np
import matplotlib.pylab as plt

In [163]:
request = fetch_openml(data_id=1489,as_frame=True)
X = request.data
y = request.target

In [164]:
X.shape,y.shape

((5404, 5), (5404,))

In [165]:
X

Unnamed: 0,V1,V2,V3,V4,V5
0,0.489927,-0.451528,-1.047990,-0.598693,-0.020418
1,-0.641265,0.109245,0.292130,-0.916804,0.240223
2,0.870593,-0.459862,0.578159,0.806634,0.835248
3,-0.628439,-0.316284,1.934295,-1.427099,-0.136583
4,-0.596399,0.015938,2.043206,-1.688448,-0.948127
...,...,...,...,...,...
5399,-0.658318,1.331760,-0.081621,1.794253,-1.082181
5400,-0.044375,-0.010512,0.030989,-0.019379,1.281061
5401,0.246882,-0.793228,1.190101,1.423194,-1.303036
5402,-0.778907,-0.383111,1.727029,-1.432389,-1.208085


In [166]:
y

0       1
1       1
2       1
3       1
4       1
       ..
5399    1
5400    2
5401    2
5402    1
5403    2
Name: Class, Length: 5404, dtype: category
Categories (2, object): ['1', '2']

Voting

In [167]:
Xtr, Xte, ytr, yte = train_test_split(X, y)
Xtr.shape, Xte.shape, ytr.shape, yte.shape

((4053, 5), (1351, 5), (4053,), (1351,))

In [175]:
voting = VotingClassifier([
    ('knn', KNeighborsClassifier()),
    ('log', LogisticRegression()),
    ('tree', DecisionTreeClassifier())
])


voting.fit(Xtr, ytr)
vote_pred = voting.predict(Xte)
vote_hits = vote_pred == yte
vote_hits, sum(vote_hits)/len(vote_hits)

(3532     True
 593      True
 4919     True
 1712    False
 4920     True
         ...  
 414      True
 2445     True
 1175     True
 170     False
 4301     True
 Name: Class, Length: 1351, dtype: bool, 0.8652849740932642)

In [176]:
parametros = {
    'log__solver':['lbfgs', 'liblinear'],
    'tree__max_depth': list(range(1,10,2)),
    'knn__n_neighbors': list(range(1,10,2))   
}

modelo_gv = GridSearchCV(voting,param_grid= parametros)

In [177]:
scores = cross_validate(modelo_gv, X, y)
scores, np.mean(scores['test_score'])

({'fit_time': array([23.81906271, 20.88467431, 22.13368177, 22.93412542, 20.98750806]),
  'score_time': array([0.0702858 , 0.04444885, 0.08903885, 0.04260349, 0.07003856]),
  'test_score': array([0.86586494, 0.88251619, 0.87881591, 0.88251619, 0.87592593])},
 0.8771278308836126)

In [178]:
modelo_gv.fit(X, y)
modelo_gv.best_estimator_

VotingClassifier(estimators=[('knn', KNeighborsClassifier(n_neighbors=1)),
                             ('log', LogisticRegression()),
                             ('tree', DecisionTreeClassifier(max_depth=9))])

Stacking

In [172]:
stacking = StackingClassifier([
    ('voting', voting),
    ('extratrees', ExtraTreesClassifier(random_state=42)),
    ('randomforest', RandomForestClassifier(random_state=42))
], cv=3, passthrough=True)

In [173]:
stacking.fit(Xtr, ytr) 
sc_pr = stacking.predict(Xte)
schits = sc_pr == yte
schits, sum(schits)/len(schits)

(3532     True
 593      True
 4919     True
 1712     True
 4920    False
         ...  
 414      True
 2445     True
 1175     True
 170     False
 4301     True
 Name: Class, Length: 1351, dtype: bool, 0.9104367135455218)

In [174]:
parametros = {
    'voting__tree__max_depth': list(range(1,10,2)),
    'voting__knn__n_neighbors': list(range(1,10,2)),
    'voting__per__solver':[ 'lbfgs', 'liblinear']
}

modelo_sg = GridSearchCV(stacking, param_grid=parametros)

scores = cross_validate(modelo_sg, X, y)
scores, np.mean(scores['test_score'])

({'fit_time': array([4.23308969, 4.17568612, 4.19631076, 4.16768694, 4.16813588]),
  'score_time': array([0.14453268, 0.1513555 , 0.14486885, 0.14264035, 0.14429903]),
  'test_score': array([0.91396855, 0.91859389, 0.90841813, 0.91489362, 0.91203704])},
 0.9135822455202659)