# Strojno učenje: Evelvacija in izboljšanje modela

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
from sklearn.datasets import make_blobs
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
# create a synthetic dataset
X, y = make_blobs(random_state=0)

# split data and labels into a training and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# instantiate a model and fit it to the training set
logreg = LogisticRegression().fit(X_train, y_train)

# evaluate the model on the test set
print(f"Test set score: {logreg.score(X_test, y_test):.2f}")

## Cross-Validation

### Cross-Validation in scikit-learn

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression

In [None]:
iris = load_iris()
logreg = LogisticRegression(max_iter=1000)

In [None]:
scores = cross_val_score(logreg, iris.data, iris.target)

In [None]:
print(f"Cross-validation scores: {scores}")

In [None]:
scores = cross_val_score(logreg, iris.data, iris.target, cv=10)

In [None]:
print(f"Cross-validation scores: {scores}")

In [None]:
print(f"Average cross-validation score: {scores.mean():.2f}")

### Benefits of Cross-Validation

### Stratified k-Fold Cross-Validation

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
print(f"Iris labels:\n{iris.target}")

In [None]:
from plot_helpers import plot_stratified_cross_validation
plot_stratified_cross_validation()

### More control over cross-validation

In [None]:
from sklearn.model_selection import KFold

kfold = KFold(n_splits=5)

In [None]:
print(f"Cross-validation scores:\n{cross_val_score(logreg, iris.data, iris.target, cv=kfold)}")

In [None]:
kfold = KFold(n_splits=3)
print(f"Cross-validation scores:\n{cross_val_score(logreg, iris.data, iris.target, cv=kfold)}")

In [None]:
kfold = KFold(n_splits=3, shuffle=True, random_state=0)
print(f"Cross-validation scores:\n{cross_val_score(logreg, iris.data, iris.target, cv=kfold)}")

### Leave-one-out cross-validation

In [None]:
from sklearn.model_selection import LeaveOneOut

loo = LeaveOneOut()

scores = cross_val_score(logreg, iris.data, iris.target, cv=loo)

print("Number of cv iterations: ", len(scores))
print(f"Mean accuracy: {scores.mean():.2f}")

### Shuffle-split cross-validation

In [None]:
from plot_helpers import plot_shuffle_split
plot_shuffle_split()

In [None]:
from sklearn.model_selection import ShuffleSplit

shuffle_split = ShuffleSplit(test_size=.5, train_size=.5, n_splits=10)

scores = cross_val_score(logreg, iris.data, iris.target, cv=shuffle_split)

print(f"Cross-validation scores:\n{scores}")

### Cross-validation with groups

In [None]:
from sklearn.model_selection import GroupKFold

# create synthetic dataset
X, y = make_blobs(n_samples=12, random_state=0)

# assume the first three samples belong to the same group,
# then the next four, etc.
groups = [0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3]

scores = cross_val_score(logreg, X, y, groups=groups, cv=GroupKFold(n_splits=3))

print("Cross-validation scores:\n{}".format(scores))

## Grid Search

### Simple Grid Search

In [None]:
#naive grid search implementation
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)

print(f"Size of training set: {X_train.shape[0]} size of test set: {X_test.shape[0]}")
      
best_score = 0
      
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        # for each combination of parameters, train an SVC
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train, y_train)
        # evaluate the SVC on the test set
        score = svm.score(X_test, y_test)
        # if we got a better score, store the score and parameters
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}

print("Best score: {:.2f}".format(best_score))
print("Best parameters: {}".format(best_parameters))

### The Danger of Overfitting the Parameters and the Validation Set

In [None]:
from plot_helpers import plot_threefold_split
plot_threefold_split()

In [None]:
from sklearn.svm import SVC

# split data into train+validation set and test set
X_trainval, X_test, y_trainval, y_test = train_test_split(iris.data, iris.target, random_state=0)

# split train+validation set into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_trainval, y_trainval, random_state=1)


print(f"Size of training set: {X_train.shape[0]} size of validation set: {X_valid.shape[0]} size of test set: {X_test.shape[0]}\n")

best_score = 0

for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        # for each combination of parameters, train an SVC
        svm = SVC(gamma=gamma, C=C)
        svm.fit(X_train, y_train)
        
        # evaluate the SVC on the test set
        score = svm.score(X_valid, y_valid)
        
        # if we got a better score, store the score and parameters
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}
            
# rebuild a model on the combined training and validation set,
# and evaluate it on the test set
svm = SVC(**best_parameters)
svm.fit(X_trainval, y_trainval)
test_score = svm.score(X_test, y_test)

print(f"Best score on validation set: {best_score:.2f}")
print(f"Best parameters: {best_parameters}")
print(f"Test set score with best parameters: {test_score:.2f}")

### Grid Search with Cross-Validation

In [None]:
for gamma in [0.001, 0.01, 0.1, 1, 10, 100]:
    for C in [0.001, 0.01, 0.1, 1, 10, 100]:
        # for each combination of parameters,
        # train an SVC
        svm = SVC(gamma=gamma, C=C)
        # perform cross-validation
        scores = cross_val_score(svm, X_trainval, y_trainval, cv=5)
        # compute mean cross-validation accuracy
        score = np.mean(scores)
        # if we got a better score, store the score and parameters
        if score > best_score:
            best_score = score
            best_parameters = {'C': C, 'gamma': gamma}
        
# rebuild a model on the combined training and validation set
svm = SVC(**best_parameters)
svm.fit(X_trainval, y_trainval)

In [None]:
from plot_helpers import plot_grid_search_overview
plot_grid_search_overview()

In [None]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
              'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}

print(f"Parameter grid:\n{param_grid}")

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

grid_search = GridSearchCV(SVC(), param_grid, cv=5)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)

In [None]:
grid_search.fit(X_train, y_train)

In [None]:
print(f"Test set score: {grid_search.score(X_test, y_test):.2f}")

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.2f}")

In [None]:
print(f"Best estimator:\n{grid_search.best_estimator_}")

#### Analyzing the result of cross-validation

In [None]:
import pandas as pd

# convert to DataFrame
results = pd.DataFrame(grid_search.cv_results_)
# show the first 5 rows

results.head()

In [None]:
# from plot_helpers import heatmap

# scores = np.array(results['mean_test_score']).reshape(6, 6)

# # plot the mean cross-validation scores
# heatmap(scores, xlabel='gamma', xticklabels=param_grid['gamma'], ylabel='C', yticklabels=param_grid['C'], cmap="viridis")
# plt.show()


import seaborn as sns
scores = np.array(results['mean_test_score']).reshape(6, 6)
sns.heatmap(scores, vmin=0, vmax=1, cmap="viridis", annot=True, xticklabels=param_grid['gamma'], yticklabels=param_grid['C'])
plt.xlabel("gamma") 
plt.ylabel("C") 
plt.show()

In [None]:
from plot_helpers import heatmap

fig, axes = plt.subplots(1, 3, figsize=(13, 5))

param_grid_linear = {'C': np.linspace(1, 2, 6), 'gamma': np.linspace(1, 2, 6)}
param_grid_one_log = {'C': np.linspace(1, 2, 6), 'gamma': np.logspace(-3, 2, 6)}
param_grid_range = {'C': np.logspace(-3, 2, 6), 'gamma': np.logspace(-7, -2, 6)}

for param_grid, ax in zip([param_grid_linear, param_grid_one_log,
    param_grid_range], axes):
    grid_search = GridSearchCV(SVC(), param_grid, cv=5)
    grid_search.fit(X_train, y_train)
    scores = grid_search.cv_results_['mean_test_score'].reshape(6, 6)
    # plot the mean cross-validation scores
    scores_image = heatmap(scores, 
                           xlabel='gamma', 
                           ylabel='C', 
                           xticklabels=param_grid['gamma'], 
                           yticklabels=param_grid['C'], 
                           cmap="viridis", ax=ax)

#### Search over spaces that are not grids

In [None]:
param_grid = [{'kernel': ['rbf'],
               'C': [0.001, 0.01, 0.1, 1, 10, 100],
               'gamma': [0.001, 0.01, 0.1, 1, 10, 100]},
              {'kernel': ['linear'],
               'C': [0.001, 0.01, 0.1, 1, 10, 100]}]

print(f"List of grids:\n{param_grid}")

In [None]:
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_train, y_train)

print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.2f}")

### Parallelizing cross-validation and grid search

## Evaluation Metrics and Scoring

### Metrics for Binary Classification

#### Kinds of errors

#### Imbalanced datasets

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits

digits = load_digits()

y = digits.target == 9

X_train, X_test, y_train, y_test = train_test_split(digits.data, y, random_state=0)

In [None]:
from sklearn.dummy import DummyClassifier
import numpy as np

dummy_majority = DummyClassifier(strategy='most_frequent').fit(X_train, y_train)

pred_most_frequent = dummy_majority.predict(X_test)

print(f"Unique predicted labels: {np.unique(pred_most_frequent)}")
print(f"Test score: {dummy_majority.score(X_test, y_test):.2f}")

In [None]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_depth=2).fit(X_train, y_train)

pred_tree = tree.predict(X_test)
print(f"Test score: {tree.score(X_test, y_test):.2f}")

In [None]:
from sklearn.linear_model import LogisticRegression

dummy = DummyClassifier(strategy='stratified').fit(X_train, y_train)
pred_dummy = dummy.predict(X_test)
print(f"dummy score: {dummy.score(X_test, y_test):.2f}")

logreg = LogisticRegression(C=0.1, max_iter=1000).fit(X_train, y_train)
pred_logreg = logreg.predict(X_test)
print(f"logreg score: {logreg.score(X_test, y_test):.2f}")

#### Confusion matrices

In [None]:
from sklearn.metrics import confusion_matrix

confusion = confusion_matrix(y_test, pred_logreg)
print(f"Confusion matrix:\n{confusion}")

<img src="https://static.packt-cdn.com/products/9781838555078/graphics/C13314_06_05.jpg">

In [None]:
print("Most frequent class:")
print(confusion_matrix(y_test, pred_most_frequent))

print("\nDummy model:")
print(confusion_matrix(y_test, pred_dummy))

print("\nDecision tree:")
print(confusion_matrix(y_test, pred_tree))

print("\nLogistic Regression")
print(confusion_matrix(y_test, pred_logreg))

In [None]:
from sklearn.metrics import f1_score

print(f"f1 score most frequent: {f1_score(y_test, pred_most_frequent):.2f}")

print(f"f1 score dummy: {f1_score(y_test, pred_dummy):.2f}")
print(f"f1 score tree: {f1_score(y_test, pred_tree):.2f}")
print(f"f1 score logistic regression: {f1_score(y_test, pred_logreg):.2f}")

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, pred_most_frequent, target_names=["not nine", "nine"]))

In [None]:
print(classification_report(y_test, pred_dummy, target_names=["not nine", "nine"]))

In [None]:
print(classification_report(y_test, pred_logreg, target_names=["not nine", "nine"]))

### Metrics for Multiclass Classification

In [None]:
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(digits.data, digits.target, random_state=0)

lr = LogisticRegression(max_iter=3000).fit(X_train, y_train)
pred = lr.predict(X_test)

print("Accuracy: {:.3f}".format(accuracy_score(y_test, pred)))
print("Confusion matrix:\n{}".format(confusion_matrix(y_test, pred)))

In [None]:
scores_image = heatmap(confusion_matrix(y_test, pred), xlabel='Predicted label',
ylabel='True label', xticklabels=digits.target_names,
yticklabels=digits.target_names, cmap=plt.cm.gray_r, fmt="%d")
plt.title("Confusion matrix")
plt.gca().invert_yaxis()
plt.show()

In [None]:
print(classification_report(y_test, pred))

In [None]:
print("Micro average f1 score: {:.3f}".format(f1_score(y_test, pred, average="micro")))
print("Macro average f1 score: {:.3f}".format(f1_score(y_test, pred, average="macro")))

### Regression Metrics