### Avaliação Aula 7 - Voting Classifier

In [5]:
from si.io.csv_file import read_csv
from si.model_selection.split import train_test_split
from si.models.knn_classifier import KNNClassifier
from si.models.logistic_regression import LogisticRegression
from si.models.decision_tree_classifier import DecisionTreeClassifier
from si.ensemble.voting_classifier import VotingClassifier
from si.model_selection.cross_validation import k_fold_cross_validation
from si.model_selection.grid_search import grid_search_cv
from si.model_selection.randomized_search_cv import randomized_search_cv
import numpy as np

In [3]:
filename = r'C:\Users\Fofinha\Desktop\UNI\MESTRADO\2o ANO\Sistemas Inteligentes\si\datasets\breast_bin\breast-bin.csv'

In [4]:
breast_bin = read_csv(filename, features=False, label=True)

In [4]:
train_dataset, test_dataset = train_test_split(breast_bin, test_size=0.2, random_state=0)

In [5]:
knn_model = KNNClassifier(k=3)

logistic_model = LogisticRegression()

dt_model = DecisionTreeClassifier()

In [6]:
#voting classifier
voting = VotingClassifier([knn_model, logistic_model, dt_model])

voting.fit(train_dataset)

<si.ensemble.voting_classifier.VotingClassifier at 0x21f0f28d1d0>

In [7]:
voting.predict(test_dataset)

array([1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 0.,
       1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0., 0., 0., 1.,
       0., 0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 1., 0., 0., 1.,
       0., 1., 0., 0., 1., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0.,
       0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 1., 0.,
       0., 0., 1.])

In [8]:
voting.score(test_dataset)

0.9712230215827338

### Avaliação Aula 7 - StackingClassifier

In [1]:
from si.io.csv_file import read_csv
from si.model_selection.split import stratified_train_test_split
from si.models.knn_classifier import KNNClassifier
from si.models.logistic_regression import LogisticRegression
from si.models.decision_tree_classifier import DecisionTreeClassifier
from si.ensemble.stacking_classifier import StackingClassifier


In [2]:
filename_breast = r"C:\Users\Fofinha\Desktop\UNI\MESTRADO\2o ANO\Sistemas Inteligentes\si\datasets\breast_bin\breast-bin.csv"
breast=read_csv(filename_breast, sep=",",features=True,label=True)
train_data, test_data = stratified_train_test_split(breast, test_size=0.20, random_state=42)


In [3]:
#knnregressor
knn = KNNClassifier(k=3)

#logistic regression
LG=LogisticRegression(l2_penalty=1, alpha=0.001, max_iter=1000)

#decisiontreee
DT=DecisionTreeClassifier(min_sample_split=3, max_depth=3, mode='gini')

#final model
final_modelo=knn
modelos=[knn,LG,DT]
exercise=StackingClassifier(modelos,final_modelo)
exercise.fit(train_data)
print(exercise.score(test_data))

0.9784172661870504


### Avaliação Cross Validation

In [7]:
model = LogisticRegression()

# cross validate the model
scores_ = k_fold_cross_validation(model, breast_bin, cv=5, seed=1)
print(scores_)
print(f'Mean score: {np.mean(scores_)} +/- {np.std(scores_)}')

[0.9424460431654677, 0.9712230215827338, 0.9928057553956835, 0.9640287769784173, 0.9712230215827338]
Mean score: 0.9683453237410072 +/- 0.01615103907959975


### Avaliação Grid Search

In [8]:
model = LogisticRegression()
parameter_grid_ = {
        'l2_penalty': (1, 10),
        'alpha': (0.001, 0.0001),
        'max_iter': (1000, 2000)
    }
results_ = grid_search_cv(model,
                          breast_bin,
                          hyperparameter_grid=parameter_grid_,
                          cv=3)

print(results_)
#scores
print(results_["scores"])
# get the best score
best_score = results_['best_score']
print(f"Best score: {best_score}")

{'scores': [0.9669540229885056, 0.9669540229885057, 0.9669540229885057, 0.9698275862068965, 0.9669540229885057, 0.9683908045977012, 0.9669540229885056, 0.9669540229885057], 'hyperparameters': [{'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 1000}, {'l2_penalty': 1, 'alpha': 0.001, 'max_iter': 2000}, {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 1000}, {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}, {'l2_penalty': 10, 'alpha': 0.001, 'max_iter': 1000}, {'l2_penalty': 10, 'alpha': 0.001, 'max_iter': 2000}, {'l2_penalty': 10, 'alpha': 0.0001, 'max_iter': 1000}, {'l2_penalty': 10, 'alpha': 0.0001, 'max_iter': 2000}], 'best_hyperparameters': {'l2_penalty': 1, 'alpha': 0.0001, 'max_iter': 2000}, 'best_score': 0.9698275862068965}
[0.9669540229885056, 0.9669540229885057, 0.9669540229885057, 0.9698275862068965, 0.9669540229885057, 0.9683908045977012, 0.9669540229885056, 0.9669540229885057]
Best score: 0.9698275862068965


### Avaliação aula 7 - Randomized Search

In [9]:
model = LogisticRegression()
parameter_grid_ = {
        'l2_penalty':np.linspace(1, 10, 10),
        'alpha':  np.linspace(0.001, 0.0001, 100),
        'max_iter': np.linspace(1000, 2000, 200)
    }
results_ = randomized_search_cv(model,
                          breast_bin,
                          hyperparameter_grid=parameter_grid_,
                          cv=3,
                          n_iter=100)

print(results_)

#scores
print(results_["scores"])

# get the best score
best_score = results_['best_score']
print(f"Best score: {best_score}")

{'scores': [0.9669540229885057, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9669540229885056, 0.9669540229885056, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9655172413793104, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885056, 0.9683908045977012, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9683908045977011, 0.9669540229885057, 0.9669540229885057, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9683908045977012, 0.9669540229885057, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9669540229885057, 0.9669540229885056, 0.9669540229885057, 0.9669540229885057, 0.966954