# Introduction to Machine Learning with scikit-learn

### This is mostly just a notebook for the sklearn tutorial available at:
http://scikit-learn.org/stable/tutorial/basic/tutorial.html

## Example 1: Classifying Handwritten Digits

### Load the data

In [None]:
from sklearn import datasets
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
digits = datasets.load_digits()

In [None]:
type(digits)

In [None]:
digits.data

In [None]:
digits.target

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=3)
for i in range(3):
    for j in range(3):
        ax[i, j].imshow(digits.images[3 * i + j], cmap=plt.cm.gray)

In [None]:
digits.target[:9]

### Learning and prediction

In [None]:
from sklearn import svm
clf = svm.SVC(gamma=0.001, C=100.)

In [None]:
digits.data.shape

In [None]:
digits.target.shape

In [None]:
clf.fit(digits.data[:1700, :], digits.target[:1700])

In [None]:
prediction = clf.predict(digits.data[1700:, :])

In [None]:
prediction

In [None]:
digits.target[1700:]

In [None]:
np.allclose(prediction, digits.target[1700:])

In [None]:
np.sum(prediction == digits.target[1700:])

In [None]:
95.0 / 97

In [None]:
from sklearn.metrics import accuracy_score
accuracy_score(digits.target[1700:], prediction)

### Learning unseen samples

In [None]:
x_train, y_train = digits.data[digits.target != 0, :], digits.target[digits.target != 0]
x_test, y_test = digits.data[digits.target == 0, :], digits.target[digits.target == 0]

In [None]:
clf.fit(x_train, y_train)

In [None]:
prediction = clf.predict(x_test)

In [None]:
accuracy_score(y_test, prediction)

In [None]:
prediction

In [None]:
0 in prediction

In [None]:
np.unique(prediction)

### Unsupervised Learning

In [None]:
from sklearn.cluster import KMeans
km = KMeans(n_clusters=10)

In [None]:
x_train, y_train = digits.data[:1700, :], digits.target[:1700]
x_test, y_test = digits.data[1700:, :], digits.target[1700:]

In [None]:
km.fit(x_train)

### Dimensionality Redcution

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
x_red = pca.fit_transform(x_train)
cc_red = pca.transform(km.cluster_centers_)

In [None]:
plt.figure(figsize=(10, 8))
plt.scatter(x_red[:, 0], x_red[:, 1], c=km.predict(x_train))