## Chapter 3 -  Classification
### Multioutput Classification

In [1]:
import pickle

import pandas as pd
import numpy as np
import numpy.random as rnd
import matplotlib
import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.metrics import (precision_score, 
                             recall_score, 
                             classification_report, 
                             confusion_matrix, f1_score, 
                             precision_recall_curve, roc_curve, roc_auc_score)

def load(fname):
    import pickle
    mnist = None
    try:
        with open(fname, 'rb') as f:
            mnist = pickle.load(f)
            return mnist
    except FileNotFoundError:
        from sklearn.datasets import fetch_openml
        mnist = fetch_openml('mnist_784', version=1, cache=True)
        with open(fname, 'wb') as f:
            mnist = pickle.dump(mnist, f)
        return mnist

Another type of problem is the multioutput classification problem where each label can be multiclass.

In [3]:
# Ingest
mnsit = load('mnist.data.pkl')
mnsit_X, mnsit_y = mnsit['data'], mnsit['target']

X_train, X_test, y_train, y_test = train_test_split(mnsit_X, mnsit_y, test_size=0.15, random_state=0)
y_train, y_test = y_train.astype(int), y_test.astype(int)

In this case, predict the correct cleaned image from the noisy image.

In [18]:
noise_train = rnd.randint(0, 100, (len(X_train), 784))
noise_test = rnd.randint(0, 100, (len(X_test), 784))
X_train_mod = X_train + noise_train
X_test_mod = X_test + noise_test

y_train_mod = X_train
y_test_mod = X_test

In [19]:
X_train_mod_sample = X_train_mod[:100]
y_train_mod_sample = y_train_mod[:100]

In [20]:
# Train a kNN classifier
knn_clf = KNeighborsClassifier()
knn_clf.fit(X_train_mod_sample, y_train_mod_sample)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                     weights='uniform')

In [21]:
i = 4
clean_digit = knn_clf.predict([X_test_mod[4]])
print(clean_digit)

[[  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
    0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   