In [207]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import accuracy_score
from statistics import mean
from scikitplot.metrics import plot_confusion_matrix
from sklearn.preprocessing import StandardScaler, RobustScaler, QuantileTransformer
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.manifold import TSNE
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.tree import DecisionTreeClassifier

In [218]:
def addestraSVC(dataset, c, gamma, kernel, dim):
    X = dataset['data']
    y = dataset['target']
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=30, stratify=y)
    
    sc = StandardScaler()
    
    X_train = sc.fit_transform(X_train)
    X_test = sc.fit_transform(X_test)
    
    pca = PCA(n_components=dim)
    
    X_train = pca.fit_transform(X_train)
    X_test = pca.fit_transform(X_test)
    
    model = SVC(gamma = gamma, C=c, kernel = kernel)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    return accuracy_score(y_test, y_pred)

def addrestraDecisionTreeClassifier(dataset, criterion, max_depth, dim):
    X = dataset['data']
    y = dataset['target']
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=30, stratify=y)
    
    sc = StandardScaler()
    
    X_train = sc.fit_transform(X_train)
    X_test = sc.fit_transform(X_test)
    
    pca = PCA(n_components=dim)
    
    X_train = pca.fit_transform(X_train)
    X_test = pca.fit_transform(X_test)
    
    model = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth)
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    return accuracy_score(y_test, y_pred)

#funzione che calcola il valore degli iperparametri per SVC
def trovaIperparametri(dataset, model, numero_dimensioni):
    
    X = dataset['data']
    y = dataset['target']
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=30, stratify=y)

    if model == SVC:
        steps = [
            ('scaler', StandardScaler()),
            ('reduce_dim', PCA()),
            ('SVM', SVC()), 
        ]

        pipeline = Pipeline(steps)

        valori_C = np.arange(0.1, 1.0, 0.1)
        valori_gamma = [0.1, 0.01]
        valori_kernel = ['linear', 'poly', 'rbf', 'sigmoid']

        params = { 'SVM__C': valori_C,
                  'SVM__gamma': valori_gamma,
                  'SVM__kernel': valori_kernel,
                  'reduce_dim__n_components': np.arange(1, 30, 1),
                }
    elif model == DecisionTreeClassifier:
        
        steps = [
            ('scaler', StandardScaler()),
            ('reduce_dim', PCA()),
            ('tree', DecisionTreeClassifier()),
        ]
        
        pipeline= Pipeline(steps)
        
        params = {'reduce_dim__n_components': np.arange(1, 30, 1),
                  'tree__criterion': ['gini', 'entropy'],
                  'tree__max_depth': [1, 10, 100, 1000],
                 }
    
    grid = GridSearchCV(pipeline, param_grid=params, cv=3)
    
    grid.fit(X_train, y_train)
    
    
    return grid.best_params_

In [217]:
dataset = load_breast_cancer()

trovaIperparametri(dataset, DecisionTreeClassifier, 30)

{'reduce_dim__n_components': 3,
 'tree__criterion': 'gini',
 'tree__max_depth': 100}

In [199]:
addestraSVC(dataset, 0.8, 0.1, 'linear', 13)

0.9824561403508771

In [219]:
addrestraDecisionTreeClassifier(dataset, 'gini', 100, 30)

0.9298245614035088