In [1]:
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import seaborn as sns

pd.options.display.max_rows = 50
pd.set_option('max_columns', 50)
sns.set(style='ticks', context='talk')
sns.set(rc={"figure.figsize": (8, 8)})

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%matplotlib inline

More magic:
  
`%alias_magic t time`<br>
`?str.replace()`<br>
`%run ./two-histograms.ipynb`<br>
`!ls`<br>
`%lsmagic`<br>

Also https://www.dataquest.io/blog/jupyter-notebook-tips-tricks-shortcuts/

# `fetch_mldata`

In [2]:
%env SCIKIT_LEARN_DATA="/home/maxim/bin/scikit_learn_data"

env: SCIKIT_LEARN_DATA="/home/maxim/bin/scikit_learn_data"


In [3]:
from sklearn.datasets import fetch_mldata
iris = fetch_mldata('iris')
iris.keys()
iris.data.shape
iris.target.shape

dict_keys(['DESCR', 'COL_NAMES', 'target', 'data'])

(150, 4)

(150,)

# `SGDClassifier`

In [4]:
mnist = fetch_mldata('MNIST original')
X, y = mnist["data"], mnist["target"]
X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]

In [5]:
y_train_5 = (y_train == 5)
y_test_5 = (y_test == 5)

y_train_5.shape
y_train_5[:18]

(60000,)

array([False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False,  True, False])

In [6]:
from sklearn.linear_model import SGDClassifier

sgd = SGDClassifier(max_iter=5, random_state=42)
sgd.fit(X_train, y_train_5)

SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
       eta0=0.0, fit_intercept=True, l1_ratio=0.15,
       learning_rate='optimal', loss='hinge', max_iter=5, n_iter=None,
       n_jobs=1, penalty='l2', power_t=0.5, random_state=42, shuffle=True,
       tol=None, verbose=0, warm_start=False)

In [7]:
# it's a linear model
sgd.coef_.shape
sgd.intercept_.shape
sgd.loss

(1, 784)

(1,)

'hinge'

# `cross_val_score`


In [8]:
X_train.shape
y_train_5.shape

(60000, 784)

(60000,)

In [9]:
from sklearn.model_selection import cross_val_score
cross_val_score(sgd, X_train, y_train_5, cv=3, scoring="accuracy")

array([0.9662 , 0.9391 , 0.95755])

In [10]:
from sklearn.model_selection import StratifiedKFold
from sklearn.base import clone

def my_cross_val_score(sgd):
    result = []
    skfolds = StratifiedKFold(n_splits=3, random_state=42)
    for train_index, test_index in skfolds.split(X_train, y_train_5):
        clone_clf = clone(sgd)
        X_train_folds = X_train[train_index]
        y_train_folds = (y_train_5[train_index])
        X_test_fold = X_train[test_index]
        y_test_fold = (y_train_5[test_index])

        clone_clf.fit(X_train_folds, y_train_folds)
        y_pred = clone_clf.predict(X_test_fold)
        n_correct = sum(y_pred == y_test_fold)
        result.append(n_correct / len(y_pred))
    return result

my_cross_val_score(sgd)

[0.9662, 0.9391, 0.95755]

Link: [Stratification on CV.SE](https://stats.stackexchange.com/questions/49540/understanding-stratified-cross-validation)

# `cross_val_predict`, `confusion_matrix`

In [11]:
from sklearn.model_selection import cross_val_predict
y_train_pred = cross_val_predict(sgd, X_train, y_train_5, cv=3)

In [12]:
y_train_pred.shape
y_train_pred
y_train_5.shape
y_train_5

(60000,)

array([False, False, False, ...,  True,  True,  True])

(60000,)

array([False, False, False, ...,  True,  True,  True])

In [13]:
from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_true=y_train_5, y_pred=y_train_pred)
confusion

array([[54298,   281],
       [ 2462,  2959]])

In [14]:
# A good example for 3 classes:
confusion_matrix(y_true=[2, 0, 2, 2, 0, 1], y_pred=[0, 0, 2, 2, 0, 2])

array([[2, 0, 0],
       [0, 0, 1],
       [1, 0, 2]])

# `precision_score`, `recall_score`

In [15]:
from sklearn.metrics import precision_score, recall_score, f1_score

precision_score(y_train_5, y_train_pred)
confusion[1][1] / (confusion[1][1] + confusion[0][1])

recall_score(y_train_5, y_train_pred)
confusion[1][1] / (confusion[1][1] + confusion[1][0])

f1_score(y_train_5, y_train_pred)
confusion[1][1] / (confusion[1][1] + (confusion[1][0] + confusion[0][1]) / 2.0)

0.9132716049382716

0.9132716049382716

0.5458402508762221

0.5458402508762221

0.683292922295347

0.683292922295347