# Dataset 

In [53]:
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
dataset = fetch_openml(data_id='31')
X, y = dataset.data, dataset.target
X.shape, y.shape

((1000, 20), (1000,))

# Divisão do treinamento e test

In [54]:
from sklearn.model_selection import train_test_split 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((750, 20), (250, 20), (750,), (250,))

# Modelos de clasificação

In [64]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier, StackingClassifier

import warnings
warnings.filterwarnings('ignore')

# voting
voting = VotingClassifier(estimators =[
    ('logistic', LogisticRegression()),
    ('svc', SVC(probability=True)),
    ('knn', KNeighborsClassifier())
])

voting_params = {
    'voting': ['hard', 'soft'],
    'logistic__solver': ['liblinear', 'lbfgs'],
    'svc__decision_function_shape': ['ovo', 'ovr'],
    'svc__C': [10, 100],
    'knn__n_neighbors': [3, 5, 9],
}

grid_voting = GridSearchCV(voting, param_grid=voting_params, n_jobs=-1)

# stacking 
stacking = StackingClassifier(estimators=[
    ('tree', DecisionTreeClassifier(splitter='random')),
    ('randomforest', RandomForestClassifier(random_state=10)),
    ('knn', KNeighborsClassifier())
])

stacking_params = {
    'passthrough': [True, False],
    'tree__max_depth': [None, 7, 14],    
    'knn__weights':['uniform', 'distance'],   
    'randomforest__max_depth': [None, 7, 14]
}

grid_stacking = GridSearchCV(stacking, param_grid=stacking_params, n_jobs=-1)

# pipeline do modelo geral
pipemodel = Pipeline([
    ('voting', grid_voting),
    ('stacking', grid_stacking)
])

params_model = {
    'voting__cv':[3, 5],
    'stacking__cv':[3, 5]
}

model = GridSearchCV(pipemodel, param_grid=params_model, n_jobs=-1)
model.fit(X_train, y_train)
predict = model.predict(X_test)
hits = predict == y_test
hits, sum(hits)/len(hits)


(array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        False,  True, False, False,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True, False,  True,  True,
        False, False, False,  True, False,  True,  True,  True,  True,
        False, False,  True,  True, False, False,  True,  True, False,
         True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True, False, False, False,  True,  True,  True,  True,
        False,  True,  True, False, False, False,  True,  True, False,
         True,  True, False, False, False,  True,  True, False,  True,
         True, False,  True,  True,  True, False,  True, False,  True,
         True,  True,  True,  True,  True, False, False, False, False,
        False,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True,  True,  True,  True,  True, False,  True, False,
         True, False,  True,  True,  True,  True,  True,  True,  True,
      