### Voting Classifier

In [None]:
from si.io.csv_file import read_csv
from si.model_selection.split import train_test_split
from si.models.knn_classifier import KNNClassifier
from si.models.logistic_regression import LogisticRegression
from si.models.decision_tree_classifier import DecisionTreeClassifier
from si.ensemble.voting_classifier import VotingClassifier
from si.model_selection.cross_validation import k_fold_cross_validation
from si.model_selection.grid_search import grid_search_cv
from si.model_selection.randomized_search import randomized_search_cv
import numpy as np

In [1]:

data = r'C:\Users\joana\OneDrive\Documentos\GitHub\si\datasets\breast_bin\breast-bin.csv'
breast_bin = read_csv(data, features=False, label=True)

NameError: name 'read_csv' is not defined

In [None]:
train_dataset, test_dataset = train_test_split(breast_bin, test_size=0.2, random_state=0)

In [None]:
knn_model = KNNClassifier(k=3)

log_model = LogisticRegression()

DT_model = DecisionTreeClassifier()

In [None]:
voting = VotingClassifier([knn_model, log_model, DT_model])

voting.fit(train_dataset)

In [None]:
print('Predict:', voting.predict(test_dataset))
print('Score', voting.score(test_dataset))

### StackingClassifier

In [None]:
from si.io.csv_file import read_csv
from si.model_selection.split import stratified_train_test_split
from si.models.knn_classifier import KNNClassifier
from si.models.logistic_regression import LogisticRegression
from si.models.decision_tree_classifier import DecisionTreeClassifier
from si.ensemble.stacking_classifier import StackingClassifier

In [None]:
data = r'C:\Users\joana\OneDrive\Documentos\GitHub\si\datasets\breast_bin\breast-bin.csv'
breast_bin = read_csv(data, features=False, label=True)

In [None]:
train_dataseta, test_dataset = train_test_split(breast_bin, test_size=0.2, random_state=0)

In [None]:

knn = KNNClassifier(k=3)

In [None]:

LG=LogisticRegression(l2_penalty=1, alpha=0.001, max_iter=1000)

In [None]:

DT=DecisionTreeClassifier(min_sample_split=3, max_depth=3, mode='gini')

In [None]:

model=knn
modelos=[knn,LG,DT]
exercise=StackingClassifier(modelos,model)
exercise.fit(train_dataset)
print(exercise.score(test_dataset))

### Cross Validation

In [None]:
model = LogisticRegression()

np.random.seed(42)


scores = k_fold_cross_validation(model, breast_bin, cv=5)  # cv=5 means 5-fold cross-validation

print("Scores for each fold:", scores)

# Calculate and print the mean and standard deviation of scores
print(f"Mean score: {np.mean(scores)} +/- {np.std(scores)}")

### Grid Search

In [None]:
model = LogisticRegression()

# Define the parameter grid for grid search
parameter_grid = {
    'penalty': ['l2'],  
    'C': [0.1, 1.0, 10.0],  
    'max_iter': [500, 1000, 1500] 
}

# Perform grid search with cross-validation
grid_search = grid_search_cv(model, breast_bin,  param_grid=parameter_grid, cv=3)



print(grid_search)

print(grid_search["scores"])

print(grid_search['best_score'])

### Randomized Search

In [None]:
model = LogisticRegression()
parameter_grid_ = {
        'l2_penalty':['l2'],
        'alpha':  np.linspace(1, 10, 10),
        'max_iter': np.linspace(1000, 2000, 200)
    }
randomized_search = randomized_search_cv(model,
                          breast_bin,
                          hyperparameter_grid=parameter_grid_,
                          cv=3,
                          n_iter=100)

print(randomized_search)


print(randomized_search["scores"])

print(randomized_search['best_score'])