---

_You are currently using **version 1.1** of this notebook. To download notebooks and datafiles, as well as get help on Jupyter notebooks in the Coursera platform, visit the [Jupyter Notebook FAQ](https://www.coursera.org/learn/python-machine-learning/resources/bANLa) course resource._

---

## SIADS 542: Supervised Learning, Week 2:  Classification and Regression

## Preamble and Review

In [None]:
# Either of the following is no longer
# necessary for matplotlib in notebooks.
# The import statement has you covered!

# %matplotlib notebook
# %matplotlib inline

In [None]:
# Suppress all warnings only when absolutely necessary
# Warnings are in place for a reason!
import warnings

# warnings.filterwarnings('ignore')
# warnings.simplefilter('ignore')

In [None]:
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier

In [None]:
np.set_printoptions(precision=3)

## Additional imports can be inlcuded here

In [None]:
fruits = pd.read_table("fruit_data_with_colors.txt")

target_names_fruits = ["apple", "mandarin", "orange", "lemon"]
feature_names_fruits = ["height", "width", "mass", "color_score"]

X_fruits = fruits[feature_names_fruits]
y_fruits = fruits["fruit_label"]

X_fruits_2d = fruits[["height", "width"]]
y_fruits_2d = fruits["fruit_label"]

X_train, X_test, y_train, y_test = train_test_split(X_fruits, y_fruits, random_state=0)

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# we must apply the scaling to the test set that we computed for the training set
X_test_scaled = scaler.transform(X_test)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

print(
    "Accuracy of K-NN classifier on training set: {:.3f}".format(
        knn.score(X_train_scaled, y_train)
    )
)

print(
    "Accuracy of K-NN classifier on test set: {:.3f}".format(
        knn.score(X_test_scaled, y_test)
    )
)

example_fruit = {"height": [5.5], "width": [2.2], "mass": [10.0], "color_score": [0.7]}

df_example_fruit = pd.DataFrame(data=example_fruit)

example_fruit_scaled = scaler.transform(df_example_fruit)

print(
    "Predicted fruit type for ",
    example_fruit,
    " is ",
    target_names_fruits[knn.predict(example_fruit_scaled)[0] - 1],
)

In [None]:
crime = pd.read_table("CommViolPredUnnormalizedData.txt", sep=",", na_values="?")

# remove features with poor coverage or lower relevance, and keep ViolentCrimesPerPop target column
columns_to_keep = [5, 6] + list(range(11, 26)) + list(range(32, 103)) + [145]
crime = crime.iloc[:, columns_to_keep].dropna()

crime.head()

## Datasets

In [None]:
from sklearn.datasets import make_classification, make_blobs
from matplotlib.colors import ListedColormap
from sklearn.datasets import load_breast_cancer
from adspy_shared_utilities import load_crime_dataset

cmap_bold = ListedColormap(["#FFA500", "#00FF00", "#0000FF", "#000000"])


# synthetic dataset for simple regression
from sklearn.datasets import make_regression

plt.figure(figsize=(8, 6))
plt.title("Sample regression problem with one input variable")
X_R1, y_R1 = make_regression(
    n_samples=100, n_features=1, n_informative=1, bias=150.0, noise=30, random_state=0
)
plt.scatter(X_R1, y_R1, marker="o", s=50)
plt.show()


# synthetic dataset for more complex regression
from sklearn.datasets import make_friedman1

plt.figure(figsize=(8, 6))
plt.title("Complex regression problem with one input variable")
X_F1, y_F1 = make_friedman1(n_samples=100, n_features=7, random_state=0)

plt.scatter(X_F1[:, 2], y_F1, marker="o", s=50)
plt.show()

# synthetic dataset for classification (binary)
plt.figure(figsize=(8, 6))
plt.title("Sample binary classification problem with two informative features")
X_C2, y_C2 = make_classification(
    n_samples=100,
    n_features=2,
    n_redundant=0,
    n_informative=2,
    n_clusters_per_class=1,
    flip_y=0.1,
    class_sep=0.5,
    random_state=0,
)
plt.scatter(X_C2[:, 0], X_C2[:, 1], c=y_C2, marker="o", s=50, cmap=cmap_bold)
plt.show()


# more difficult synthetic dataset for classification (binary)
# with classes that are not linearly separable
X_D2, y_D2 = make_blobs(
    n_samples=100, n_features=2, centers=8, cluster_std=1.3, random_state=4
)
y_D2 = y_D2 % 2

plt.figure(figsize=(8, 6))
plt.title("Sample binary classification problem with non-linearly separable classes")
plt.scatter(X_D2[:, 0], X_D2[:, 1], c=y_D2, marker="o", s=50, cmap=cmap_bold)
plt.show()


# Breast cancer dataset for classification
cancer = load_breast_cancer()
(X_cancer, y_cancer) = load_breast_cancer(return_X_y=True)


# Communities and Crime dataset
(X_crime, y_crime) = load_crime_dataset()

## K-Nearest Neighbors

### Classification

In [None]:
from adspy_shared_utilities import plot_two_class_knn

X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0)

plt.figure(figsize=(8, 6))
plot_two_class_knn(X_train, y_train, 1, "uniform", X_test, y_test)
plt.tight_layout()

plt.figure(figsize=(8, 6))
plot_two_class_knn(X_train, y_train, 3, "uniform", X_test, y_test)
plt.tight_layout()

plt.figure(figsize=(8, 6))
plot_two_class_knn(X_train, y_train, 11, "uniform", X_test, y_test)
plt.tight_layout()

### Regression

In [None]:
from sklearn.neighbors import KNeighborsRegressor

X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0)

knnreg = KNeighborsRegressor(n_neighbors=5).fit(X_train, y_train)

print(knnreg.predict(X_test))
print("R-squared test score: {:.3f}".format(knnreg.score(X_test, y_test)))

In [None]:
X_predict_input = np.linspace(-3, 3, 50).reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(
    X_R1[0::5], y_R1[0::5], random_state=0
)

fig, subaxes = plt.subplots(1, 2, figsize=(12, 6))

for thisaxis, K in zip(subaxes, [1, 3]):
    knnreg = KNeighborsRegressor(n_neighbors=K).fit(X_train, y_train)
    y_predict_output = knnreg.predict(X_predict_input)

    thisaxis.set_title("KNN regression (K={})".format(K))
    thisaxis.set_xlabel("Input feature")
    thisaxis.set_ylabel("Target value")
    thisaxis.set_xlim([-2.5, 0.75])

    thisaxis.plot(
        X_predict_input,
        y_predict_output,
        "^",
        markersize=10,
        alpha=0.8,
        label="Predicted",
    )

    thisaxis.plot(X_train, y_train, "o", alpha=0.8, label="True Value")
    thisaxis.legend()

    plt.tight_layout()

### Regression model complexity as a function of K

In [None]:
# plot k-NN regression on sample dataset for different values of K
X_predict_input = np.linspace(-3, 3, 500).reshape(-1, 1)
X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0)

fig, subaxes = plt.subplots(5, 1, figsize=(8, 32))

for thisaxis, K in zip(subaxes, [1, 3, 7, 15, 55]):
    knnreg = KNeighborsRegressor(n_neighbors=K).fit(X_train, y_train)
    y_predict_output = knnreg.predict(X_predict_input)

    train_score = knnreg.score(X_train, y_train)
    test_score = knnreg.score(X_test, y_test)

    thisaxis.set_xlabel("Input feature")
    thisaxis.set_ylabel("Target value")
    thisaxis.set_title(
        f"KNN Regression (K={K})\nTrain $R^2 = {train_score:.3f}$,  Test $R^2 = {test_score:.3f}$"
    )

    thisaxis.plot(X_predict_input, y_predict_output)
    thisaxis.plot(X_train, y_train, "o", alpha=0.9, label="Train")
    thisaxis.plot(X_test, y_test, "^", alpha=0.9, label="Test")
    thisaxis.legend(loc="best")

    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)

## Linear models for regression

### Linear regression

In [None]:
from sklearn.linear_model import LinearRegression

X_train, X_test, y_train, y_test = train_test_split(X_R1, y_R1, random_state=0)
linreg = LinearRegression().fit(X_train, y_train)

print("linear model coeff (w): {}".format(linreg.coef_))
print("linear model intercept (b): {:.3f}".format(linreg.intercept_))
print("R-squared score (training): {:.3f}".format(linreg.score(X_train, y_train)))
print("R-squared score (test): {:.3f}".format(linreg.score(X_test, y_test)))

### Linear regression: example plot 

In [None]:
plt.figure(figsize=(8, 6))
plt.title("Least-squares linear regression")
plt.xlabel("Feature value (x)")
plt.ylabel("Target value (y)")

plt.scatter(X_R1, y_R1, marker="o", s=50, alpha=0.8)
plt.plot(X_R1, linreg.coef_ * X_R1 + linreg.intercept_, "r-")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state=0)
linreg = LinearRegression().fit(X_train, y_train)

print("Crime dataset")
print("linear model intercept: {}".format(linreg.intercept_))
print("linear model coeff:\n{}".format(linreg.coef_))
print("R-squared score (training): {:.3f}".format(linreg.score(X_train, y_train)))
print("R-squared score (test): {:.3f}".format(linreg.score(X_test, y_test)))

### Ridge regression

In [None]:
from sklearn.linear_model import Ridge

X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state=0)

linridge = Ridge(alpha=20.0).fit(X_train, y_train)

print("Crime dataset")
print("ridge regression linear model intercept: {}".format(linridge.intercept_))
print("ridge regression linear model coeff:\n{}".format(linridge.coef_))
print("R-squared score (training): {:.3f}".format(linridge.score(X_train, y_train)))
print("R-squared score (test): {:.3f}".format(linridge.score(X_test, y_test)))
print("Number of non-zero features: {}".format(np.sum(linridge.coef_ != 0)))

#### Ridge regression with feature normalization

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

from sklearn.linear_model import Ridge

X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state=0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linridge = Ridge(alpha=20.0).fit(X_train_scaled, y_train)

print("Crime dataset")
print("ridge regression linear model intercept: {}".format(linridge.intercept_))
print("ridge regression linear model coeff:\n{}".format(linridge.coef_))
print(
    "R-squared score (training): {:.3f}".format(linridge.score(X_train_scaled, y_train))
)
print("R-squared score (test): {:.3f}".format(linridge.score(X_test_scaled, y_test)))
print("Number of non-zero features: {}".format(np.sum(linridge.coef_ != 0)))

#### Ridge regression with regularization parameter: alpha

In [None]:
print("Ridge regression: effect of alpha regularization parameter\n")
for this_alpha in [0, 1, 10, 20, 50, 100, 1000]:
    linridge = Ridge(alpha=this_alpha).fit(X_train_scaled, y_train)
    r2_train = linridge.score(X_train_scaled, y_train)
    r2_test = linridge.score(X_test_scaled, y_test)
    num_coeff_bigger = np.sum(abs(linridge.coef_) > 1.0)
    print(
        "Alpha = {:.3f}\nnum abs(coeff) > 1.0: {}, \
r-squared training: {:.3f}, r-squared test: {:.3f}\n".format(
            this_alpha, num_coeff_bigger, r2_train, r2_test
        )
    )

### Lasso regression

In [None]:
from sklearn.linear_model import Lasso
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

X_train, X_test, y_train, y_test = train_test_split(X_crime, y_crime, random_state=0)

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

linlasso = Lasso(alpha=2.0, max_iter=10000).fit(X_train_scaled, y_train)

print("Crime dataset\n")
print("lasso regression linear model intercept: {}".format(linlasso.intercept_))
print("lasso regression linear model coeff:\n{}\n".format(linlasso.coef_))
print("Non-zero features: {}".format(np.sum(linlasso.coef_ != 0)))
print(
    "R-squared score (training): {:.3f}".format(linlasso.score(X_train_scaled, y_train))
)
print("R-squared score (test): {:.3f}\n".format(linlasso.score(X_test_scaled, y_test)))
print("Features with non-zero weight (sorted by absolute magnitude):")

for e in sorted(list(zip(list(X_crime), linlasso.coef_)), key=lambda e: -abs(e[1])):
    if e[1] != 0:
        print("\t{}, {:.3f}".format(e[0], e[1]))

#### Lasso regression with regularization parameter: alpha

In [None]:
print(
    "Lasso regression: effect of alpha regularization\n\
parameter on number of features kept in final model\n"
)

for alpha in [0.5, 1, 2, 3, 5, 10, 20, 50]:
    linlasso = Lasso(alpha, max_iter=10000).fit(X_train_scaled, y_train)
    r2_train = linlasso.score(X_train_scaled, y_train)
    r2_test = linlasso.score(X_test_scaled, y_test)

    print(
        f"Alpha = {alpha:.3f}, Features kept: {np.sum(linlasso.coef_ != 0)},\n\
r-squared training: {r2_train:.3f}, r-squared test: {r2_test:.3f}\n"
    )

### Polynomial regression

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures


X_train, X_test, y_train, y_test = train_test_split(X_F1, y_F1, random_state=0)
linreg = LinearRegression().fit(X_train, y_train)

print(f"linear model coeff (w): {linreg.coef_}")
print(f"linear model intercept (b): {linreg.intercept_:.3f}\n")
print(f"R-squared score (training): {linreg.score(X_train, y_train):.3f}")
print(f"R-squared score (test): {linreg.score(X_test, y_test):.3f}")

print(
    "\nNow we transform the original input data to add\n\
polynomial features up to degree 2 (quadratic)\n"
)
poly = PolynomialFeatures(degree=2)
X_F1_poly = poly.fit_transform(X_F1)

X_train, X_test, y_train, y_test = train_test_split(X_F1_poly, y_F1, random_state=0)
linreg = LinearRegression().fit(X_train, y_train)

print(f"(poly deg 2) linear model coeff (w):\n{linreg.coef_}\n")
print(f"(poly deg 2) linear model intercept (b): {linreg.intercept_:.3f}")
print(f"(poly deg 2) R-squared score (training): {linreg.score(X_train, y_train):.3f}")
print(f"(poly deg 2) R-squared score (test): {linreg.score(X_test, y_test):.3f}\n")

print(
    "\nAddition of many polynomial features often leads to\n\
overfitting, so we often use polynomial features in combination\n\
with regression that has a regularization penalty, like ridge\n\
regression.\n"
)

X_train, X_test, y_train, y_test = train_test_split(X_F1_poly, y_F1, random_state=0)
linreg = Ridge().fit(X_train, y_train)

print(f"(poly deg 2 + ridge) linear model coeff (w):\n{linreg.coef_}\n")
print(f"(poly deg 2 + ridge) linear model intercept (b): {linreg.intercept_:.3f}")
print(
    f"(poly deg 2 + ridge) R-squared score (training): {linreg.score(X_train, y_train):.3f}"
)
print(
    f"(poly deg 2 + ridge) R-squared score (test): {linreg.score(X_test, y_test):.3f}"
)

## Linear models for classification

### Logistic regression

#### Logistic regression for binary classification on fruits dataset using height, width features (positive class: apple, negative class: others)

In [None]:
from sklearn.linear_model import LogisticRegression
from adspy_shared_utilities import plot_class_regions_for_classifier_subplot

y_fruits_apple = (
    y_fruits_2d == 1
)  # make into a binary problem: apples vs everything else
X_train, X_test, y_train, y_test = train_test_split(
    X_fruits_2d.values, y_fruits_apple.values, random_state=0
)

clf = LogisticRegression(C=100).fit(X_train, y_train)

fig, subaxes = plt.subplots(1, 1, figsize=(8, 6))
subaxes.set_xlabel("height")
subaxes.set_ylabel("width")

plot_class_regions_for_classifier_subplot(
    clf,
    X_train,
    y_train,
    None,
    None,
    "Logistic regression for binary classification\nFruit dataset: Apple vs others",
    subaxes,
)

plt.tight_layout()

h, w = 6, 8
resultA = ["not an apple", "an apple"][int(clf.predict([[h, w]])[0])]
print(f"A fruit with height {h} and width {w} is predicted to be: {resultA}")

h, w = 10, 7
resultB = ["not an apple", "an apple"][int(clf.predict([[h, w]])[0])]
print(f"A fruit with height {h} and width {w} is predicted to be: {resultB}\n")

print(
    f"Accuracy of Logistic regression classifier on training set: {clf.score(X_train, y_train):.3f}"
)
print(
    f"Accuracy of Logistic regression classifier on test set: {clf.score(X_test, y_test):.3f}"
)

#### Logistic regression on simple synthetic dataset

In [None]:
from sklearn.linear_model import LogisticRegression
from adspy_shared_utilities import plot_class_regions_for_classifier_subplot


X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0)
C = 1.0

clf = LogisticRegression(C=C).fit(X_train, y_train)

fig, subaxes = plt.subplots(1, 1, figsize=(8, 6))

title = f"Logistic regression, simple synthetic dataset C = {C:.3f}"

plot_class_regions_for_classifier_subplot(
    clf, X_train, y_train, None, None, title, subaxes
)

plt.tight_layout()

print(
    f"Accuracy of Logistic regression classifier on training set: {clf.score(X_train, y_train):.3f}"
)
print(
    f"Accuracy of Logistic regression classifier on test set: {clf.score(X_test, y_test):.3f}"
)

#### Logistic regression regularization: C parameter

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_fruits_2d.values, y_fruits_apple.values, random_state=0
)

fig, subaxes = plt.subplots(3, 1, figsize=(8, 16))

for this_C, subplot in zip([0.1, 1, 100], subaxes):
    clf = LogisticRegression(C=this_C).fit(X_train, y_train)

    title = f"Logistic regression (apple vs rest)\nC = {this_C:.3f}"
    plot_class_regions_for_classifier_subplot(
        clf, X_train, y_train, X_test, y_test, title, subplot
    )

    plt.tight_layout()

#### Application to real dataset

In [None]:
from sklearn.linear_model import LogisticRegression

# default: max_iter=100
# try increasing max_iter to eliminate the warning
max_iter = 100

X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, random_state=0)

clf = LogisticRegression(max_iter=max_iter).fit(X_train, y_train)

print("Breast cancer dataset")
print(
    f"Accuracy of Logistic regression classifier on training set: {clf.score(X_train, y_train):.3f}"
)
print(
    f"Accuracy of Logistic regression classifier on test set: {clf.score(X_test, y_test):.3f}"
)

### Support Vector Machines

#### Linear Support Vector Machine

In [None]:
from sklearn.svm import SVC
from adspy_shared_utilities import plot_class_regions_for_classifier_subplot


X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0)

this_C = 1.0
clf = SVC(kernel="linear", C=this_C).fit(X_train, y_train)

fig, subaxes = plt.subplots(1, 1, figsize=(8, 6))

title = f"Linear SVC\nC = {this_C:.3f}"
plot_class_regions_for_classifier_subplot(
    clf, X_train, y_train, None, None, title, subaxes
)

plt.tight_layout()

#### Linear Support Vector Machine: C parameter

In [None]:
from sklearn.svm import LinearSVC
from adspy_shared_utilities import plot_class_regions_for_classifier


# default: max_iter=1000
# how few iterations are necessary for convergence?
max_iter = 100

X_train, X_test, y_train, y_test = train_test_split(X_C2, y_C2, random_state=0)

fig, subaxes = plt.subplots(1, 2, figsize=(12, 6))

for this_C, subplot in zip([0.00001, 100], subaxes):
    clf = LinearSVC(C=this_C, random_state=42, dual=False, max_iter=max_iter).fit(
        X_train, y_train
    )
    title = f"Linear SVC\nC = {this_C:.5f}"
    plot_class_regions_for_classifier_subplot(
        clf, X_train, y_train, None, None, title, subplot
    )

    plt.tight_layout()

#### Application to real dataset
[Read the docs](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html#sklearn.svm.LinearSVC)

In [None]:
from sklearn.svm import LinearSVC

# default: max_iter=1000
# try incrementally increasing max_iter to eliminate the warning
max_iter = 1000

X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, random_state=0)

clf = LinearSVC(max_iter=max_iter).fit(X_train, y_train)
print("Breast cancer dataset")
print(
    f"Accuracy of Linear SVC classifier on training set: {clf.score(X_train, y_train):.3f}"
)
print(f"Accuracy of Linear SVC classifier on test set: {clf.score(X_test, y_test):.3f}")

### Multi-class classification with linear models

#### LinearSVC with M classes generates M one vs rest classifiers.

In [None]:
from sklearn.svm import LinearSVC

# default: max_iter=1000
# try incrementally increasing max_iter to eliminate the warning
max_iter = 1000
random_state = None
dual = True

X_train, X_test, y_train, y_test = train_test_split(
    X_fruits_2d, y_fruits_2d, random_state=42
)

clf = LinearSVC(C=5, random_state=random_state, max_iter=max_iter, dual=dual).fit(
    X_train, y_train
)
print(f"Coefficients:\n{clf.coef_}")
print(f"Intercepts:\n{clf.intercept_}")

#### Multi-class results on the fruit dataset

In [None]:
target_names_fruits

In [None]:
colors = ["r", "g", "b", "y"]
cmap_fruits = ListedColormap(["#FF0000", "#00FF00", "#0000FF", "#FFFF00"])

x_0_range = np.linspace(-10, 15)

plt.figure(figsize=(8, 6))
plt.xlabel("height")
plt.ylabel("width")
plt.xlim(-2, 12)
plt.ylim(-2, 15)

plt.scatter(
    X_fruits_2d["height"],
    X_fruits_2d["width"],
    c=y_fruits_2d,
    cmap=cmap_fruits,
    edgecolor="lightgrey",
    alpha=0.7,
    label="fruits",
)

for w, b, color, name in zip(clf.coef_, clf.intercept_, colors, target_names_fruits):
    # Since class prediction with a linear model uses the formula y = w_0 x_0 + w_1 x_1 + b,
    # and the decision boundary is defined as being all points with y = 0, to plot x_1 as a
    # function of x_0 we just solve w_0 x_0 + w_1 x_1 + b = 0 for x_1:
    plt.plot(x_0_range, -(x_0_range * w[0] + b) / w[1], c=color, alpha=0.8, label=name)

plt.legend()
plt.tight_layout()

## Kernelized Support Vector Machines

### Classification

In [None]:
from sklearn.svm import SVC
from adspy_shared_utilities import plot_class_regions_for_classifier

X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)

# The default SVC kernel is radial basis function (RBF)
clf = SVC().fit(X_train, y_train)

plt.figure(figsize=(8, 6))
plot_class_regions_for_classifier(
    clf,
    X_train,
    y_train,
    None,
    None,
    "Support Vector Classifier: RBF kernel",
)

# Compare decision boundries with polynomial kernel, degree = 3
deg = 3
clf = SVC(kernel="poly", degree=deg).fit(X_train, y_train)

plt.figure(figsize=(8, 6))
plot_class_regions_for_classifier(
    clf,
    X_train,
    y_train,
    None,
    None,
    f"Support Vector Classifier: Polynomial kernel, degree = {deg}",
)

#### Support Vector Machine with RBF kernel: gamma parameter

In [None]:
from adspy_shared_utilities import plot_class_regions_for_classifier

X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)
fig, subaxes = plt.subplots(3, 1, figsize=(8, 16))

for this_gamma, subplot in zip([0.01, 1.0, 10.0], subaxes):
    clf = SVC(kernel="rbf", gamma=this_gamma).fit(X_train, y_train)
    title = f"Support Vector Classifier: \nRBF kernel, gamma = {this_gamma:.3f}"
    plot_class_regions_for_classifier_subplot(
        clf, X_train, y_train, None, None, title, subplot
    )

plt.tight_layout()

#### Support Vector Machine with RBF kernel: using both C and gamma parameter 

In [None]:
from sklearn.svm import SVC
from adspy_shared_utilities import plot_class_regions_for_classifier_subplot

from sklearn.model_selection import train_test_split


X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)
fig, subaxes = plt.subplots(3, 4, figsize=(15, 12))

for this_gamma, this_axis in zip([0.01, 1, 5], subaxes):

    for this_C, subplot in zip([0.1, 1, 15, 250], this_axis):
        title = f"gamma = {this_gamma:.3f}, C = {this_C:.3f}"
        clf = SVC(kernel="rbf", gamma=this_gamma, C=this_C).fit(X_train, y_train)
        plot_class_regions_for_classifier_subplot(
            clf, X_train, y_train, X_test, y_test, title, subplot
        )

        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0)

### Application of SVMs to a real dataset: unnormalized data

In [None]:
from sklearn.svm import SVC

X_train, X_test, y_train, y_test = train_test_split(X_cancer, y_cancer, random_state=0)

clf = SVC(C=10).fit(X_train, y_train)

print("Breast cancer dataset (unnormalized features)")

print(f"Accuracy of RBF-kernel SVC on training set: {clf.score(X_train, y_train):.3f}")

print(f"Accuracy of RBF-kernel SVC on test set: {clf.score(X_test, y_test):.3f}")

### Application of SVMs to a real dataset: normalized data with feature preprocessing using minmax scaling

In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

clf = SVC(C=10).fit(X_train_scaled, y_train)
print("Breast cancer dataset (normalized with MinMax scaling)")
print(
    f"RBF-kernel SVC (with MinMax scaling) training set accuracy: {clf.score(X_train_scaled, y_train):.3f}"
)
print(
    f"RBF-kernel SVC (with MinMax scaling) test set accuracy: {clf.score(X_test_scaled, y_test):.3f}"
)

## Cross-validation

### Example based on k-NN classifier with fruit dataset (2 features)

In [None]:
from sklearn.model_selection import cross_val_score

clf = KNeighborsClassifier(n_neighbors=5)
X = X_fruits_2d.values
y = y_fruits_2d.values
folds = 3
cv_scores = cross_val_score(clf, X, y, cv=folds)

print(f"Cross-validation scores ({folds}-fold): {cv_scores}")
print(f"Mean cross-validation score ({folds}-fold): {np.mean(cv_scores):.3f}")

### A note on performing cross-validation for more advanced scenarios.

In some cases (e.g. when feature values have very different ranges), we've seen the need to scale or normalize the training and test sets before use with a classifier. The proper way to do cross-validation when you need to scale the data is *not* to scale the entire dataset with a single transform, since this will indirectly leak information into the training data about the whole dataset, including the test data (see the lecture on data leakage later in the course).  Instead, scaling/normalizing must be computed and applied for each cross-validation fold separately.  To do this, the easiest way in scikit-learn is to use *pipelines*.  While these are beyond the scope of this course, further information is available in the scikit-learn documentation here:

http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html

or the Pipeline section in the recommended textbook: Introduction to Machine Learning with Python by Andreas C. Müller and Sarah Guido (O'Reilly Media).

## Validation curve example

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import validation_curve

folds = 3

param_range = np.logspace(-3, 3, 4)

train_scores, test_scores = validation_curve(
    SVC(),
    X,
    y,
    param_name="gamma",
    param_range=param_range,
    cv=folds,
)

In [None]:
print(train_scores, "\n")
print(np.mean(train_scores, axis=1), "\n")
print(np.mean(train_scores))

In [None]:
print(test_scores, "\n")
print(np.mean(test_scores, axis=1), "\n")
print(np.mean(test_scores))

### The following code is based on scikit-learn validation_plot example.
[Read the Docs:](http://scikit-learn.org/stable/auto_examples/model_selection/plot_validation_curve.html)

In [None]:
train_scores_mean = np.mean(train_scores, axis=1)
train_scores_std = np.std(train_scores, axis=1)
test_scores_mean = np.mean(test_scores, axis=1)
test_scores_std = np.std(test_scores, axis=1)
lw = 2

plt.figure(figsize=(8, 6))
plt.title("Validation Curve with SVM")
plt.xlabel("$\gamma$ (gamma)")
plt.ylabel("Score")
plt.ylim(0.0, 1.1)
plt.grid(alpha=0.2)

plt.semilogx(
    param_range,
    train_scores_mean,
    label="Training score",
    color="darkorange",
    lw=lw,
)

plt.fill_between(
    param_range,
    train_scores_mean - train_scores_std,
    train_scores_mean + train_scores_std,
    alpha=0.2,
    color="darkorange",
    lw=lw,
)

plt.semilogx(
    param_range,
    test_scores_mean,
    label="Cross-validation score",
    color="navy",
    lw=lw,
)

plt.fill_between(
    param_range,
    test_scores_mean - test_scores_std,
    test_scores_mean + test_scores_std,
    alpha=0.2,
    color="navy",
    lw=lw,
)

plt.legend(loc="best")
plt.tight_layout()