# Chapter 11
## Model evaluation

### 11.1 Cross-validating models

In [None]:
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import KFold, cross_val_score
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [None]:
digits = datasets.load_digits()
features = digits.data
target = digits.target

standardizer = StandardScaler()
logit = LogisticRegression()
pipeline = make_pipeline(standardizer, logit)

kf = KFold(n_splits=10, shuffle=True, random_state=1)

cv_results = cross_val_score(
    pipeline,
    features, 
    target,
    cv = kf,
    scoring = 'accuracy',
    n_jobs = -1
)

cv_results.mean()

In [None]:
cv_results

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size = 0.1, random_state = 1
)

standardizer.fit(features_train)

features_train_std = standardizer.transform(features_train)
features_test_std = standardizer.transform(features_test)

In [None]:
pipeline = make_pipeline(standardizer, logit)

cv_results = cross_val_score(
    pipeline,
    features,
    target,
    cv = kf,
    scoring = 'accuracy',
    n_jobs = -1
)

### 11.2 Creating a baseline regression model

In [None]:
from sklearn.datasets import load_boston
from sklearn.dummy import DummyRegressor
from sklearn.model_selection import train_test_split

In [None]:
boston = load_boston()
features, target = boston.data, boston.target
features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state=0
)

In [None]:
dummy = DummyRegressor(strategy='mean')
dummy.fit(features_train, target_train)
dummy.score(features_test, target_test)

In [None]:
from sklearn.linear_model import LinearRegression

ols = LinearRegression()
ols.fit(features_train, target_train)
ols.score(features_test, target_test)

In [None]:
clf = DummyRegressor(strategy='constant', constant=20)
clf.fit(features_train, target_train)
clf.score(features_test, target_test)

### 11.3 Creating a baseline classification model

In [None]:
from sklearn.datasets import load_iris
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split

In [None]:
iris = load_iris()
features, target = iris.data, iris.target
features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state=0
)

dummy = DummyClassifier(strategy='uniform', random_state=1)
dummy.fit(features_train, target_train)
dummy.score(features_test, target_test)

In [None]:
from sklearn.ensemble import RandomForestClassifier

classifier = RandomForestClassifier()
classifier.fit(features_train, target_train)
classifier.score(features_test, target_test)

### 11.4 Evaluating binary classifier predictions

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification

In [None]:
X, y = make_classification(
    n_samples = 10000,
    n_features = 3,
    n_informative = 3,
    n_redundant = 0,
    n_classes = 2,
    random_state = 1
)

logit = LogisticRegression()
cross_val_score(logit, X, y, scoring = 'accuracy')

In [None]:
cross_val_score(logit, X, y, scoring = 'precision')

In [None]:
cross_val_score(logit, X, y, scoring = 'recall')

In [None]:
cross_val_score(logit, X, y, scoring = 'f1')

### 10.5 Evaluating binary classifier thresholds

In [None]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.model_selection import train_test_split

In [None]:
features, target = make_classification(
    n_samples = 10000,
    n_features = 10,
    n_classes = 2,
    n_informative = 3,
    random_state = 3
)

features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size = .1, random_state = 1
)

logit = LogisticRegression()
logit.fit(features_train, target_train)
target_probabilities = logit.predict_log_proba(features_test)[:, 1]
false_positive_rate, true_positive_rate, threshold = roc_curve(target_test, target_probabilities)

In [None]:
plt.title('Receiver operating characteristic')
plt.plot(false_positive_rate, true_positive_rate)
plt.plot([0, 1], ls = '--')
plt.plot([0, 0], [1, 0], c = '.7'), plt.plot([1, 1], c = '.7')
plt.ylabel('True positive rate')
plt.xlabel('False positive rate')
plt.show()

In [None]:
print("Threshold:", threshold[116])
print("True Positive Rate:", true_positive_rate[116]) 
print("False Positive Rate:", false_positive_rate[116])

In [None]:
print("Threshold:", threshold[45])
print("True Positive Rate:", true_positive_rate[45]) 
print("False Positive Rate:", false_positive_rate[45])


In [None]:
roc_auc_score(target_test, target_probabilities)

### 11.6 Evaluating multiclass classifier predictions

In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification

features, target = make_classification(
    n_samples = 10000,
    n_features = 3,
    n_informative = 3,
    n_redundant = 0,
    n_classes = 3,
    random_state = 1
)

logit = LogisticRegression()
cross_val_score(logit, features, target, scoring = 'accuracy')

In [None]:
cross_val_score(logit, features, target, scoring = 'f1_macro')

### 11.7 Visualizing a classifier's performance

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import confusion_matrix
import pandas as pd

In [None]:
iris = datasets.load_iris()
features = iris.data
target = iris.target
class_names = iris.target_names

features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state = 1
)

classifier = LogisticRegression()
target_predicted = classifier.fit(features_train, target_train).predict(features_test)
matrix = confusion_matrix(target_test, target_predicted)

dataframe = pd.DataFrame(matrix, index = class_names, columns = class_names)

In [None]:
sns.heatmap(dataframe, annot=True, cbar=None, cmap="Blues")
plt.title("Confusion Matrix"), plt.tight_layout()
plt.ylabel("True Class"), plt.xlabel("Predicted Class")
plt.show()

### 11.8 Evaluating regression models

In [None]:
from sklearn.datasets import make_regression
from sklearn.model_selection import cross_val_score 
from sklearn.linear_model import LinearRegression

In [None]:
features, target = make_regression(
    n_samples = 100,
    n_features = 3,
    n_informative = 3,
    n_targets = 1,
    noise = 50,
    coef = False,
    random_state = 1
)

ols = LinearRegression()
print(cross_val_score(ols, features, target, scoring = 'neg_mean_squared_error', cv = 10))
print(cross_val_score(ols, features, target, scoring = 'r2', cv = 10))

### 11.9 Evaluating clustering models

In [None]:
import numpy as np
from sklearn.metrics import silhouette_score 
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

In [None]:
features, _ = make_blobs(
    n_samples = 1000,
    n_features = 10,
    centers = 2,
    cluster_std = 0.5,
    shuffle = True,
    random_state = 1
)

model = KMeans(n_clusters = 2, random_state = 1).fit(features)
target_predicted = model.labels_
silhouette_score(features, target_predicted)

### 11.10 Creating a custom evaluation metric

In [None]:
from sklearn.metrics import make_scorer, r2_score 
from sklearn.model_selection import train_test_split 
from sklearn.linear_model import Ridge
from sklearn.datasets import make_regression

In [None]:
features, target = make_regression(
    n_samples = 100,
    n_features = 3,
    random_state = 1
)

features_train, features_test, target_train, target_test = train_test_split(
    features, target, test_size = 0.1, random_state = 1
)

def custom_metric(target_test, target_predicted): 
    r2 = r2_score(target_test, target_predicted) 
    return r2

score = make_scorer(custom_metric, greater_is_better = True) 

classifier = Ridge()
model = classifier.fit(features_train, target_train) 

score(model, features_test, target_test)

### 11.11 Visualizing the effect of training set size

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier 
from sklearn.datasets import load_digits
from sklearn.model_selection import learning_curve

In [None]:
digits = load_digits()
features, target = digits.data, digits.target

train_sizes, train_scores, test_scores = learning_curve(
    RandomForestClassifier(),
    features,
    target,
    cv = 10,
    scoring = 'accuracy',
    n_jobs = -1,
    train_sizes = np.linspace(
        0.01,
        0.1,
        50
    )
)

In [None]:
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)


test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

In [None]:
plt.plot(train_sizes, train_mean, '--', color="#111111",  label="Training score")
plt.plot(train_sizes, test_mean, color="#111111", label="Cross-validation score")

plt.fill_between(train_sizes, train_mean - train_std,
                 train_mean + train_std, color="#DDDDDD")
plt.fill_between(train_sizes, test_mean - test_std,
                 test_mean + test_std, color="#DDDDDD")

plt.title("Learning Curve")
plt.xlabel("Training Set Size"), plt.ylabel("Accuracy Score"),
plt.legend(loc="best")
plt.tight_layout()
plt.show()

### 11.12 Creating a text report of evaluation metrics

In [None]:
from sklearn import datasets
from sklearn.linear_model import LogisticRegression 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report

In [None]:
iris = datasets.load_iris()
features, target, class_names = iris.data, iris.target, iris.target_names

features_train, features_test, target_train, target_test = train_test_split(
    features, target, random_state = 1
)

classifier = LogisticRegression()

model = classifier.fit(features_train, target_train)
target_predicted = model.predict(features_test)

print(
    classification_report(
        target_test, 
        target_predicted,
        target_names = class_names
    )
)

### 11.13 Visualizing the effect of hyperparameter values

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_digits
from sklearn.ensemble import RandomForestClassifier 
from sklearn.model_selection import validation_curve

In [None]:
digits = load_digits()
features, target = digits.data, digits.target

param_range = np.arange(1, 250, 2)

train_scores, test_scores = validation_curve(
    RandomForestClassifier(),
    features,
    target,
    param_name = 'n_estimators',
    param_range = param_range,
    cv = 3,
    scoring = 'accuracy',
    n_jobs = -1
)

In [None]:
train_mean = np.mean(train_scores, axis=1)
train_std = np.std(train_scores, axis=1)

test_mean = np.mean(test_scores, axis=1)
test_std = np.std(test_scores, axis=1)

In [None]:
plt.plot(param_range, train_mean, label="Training score", color="black")
plt.plot(param_range, test_mean, label="Cross-validation score", color="dimgrey")

plt.fill_between(param_range, train_mean - train_std,
                 train_mean + train_std, color="gray")
plt.fill_between(param_range, test_mean - test_std,
                 test_mean + test_std, color="gainsboro")

plt.title("Validation Curve With Random Forest")
plt.xlabel("Number Of Trees")
plt.ylabel("Accuracy Score")
plt.tight_layout()
plt.legend(loc="best")
plt.show()