# Machine Learning with scikit-learn

## Hand-written digit recognition
Load hand-written digit images

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, svm, metrics

In [None]:
digits = datasets.load_digits()
print(digits.images.shape)
digits.images[100]     

1797개의 이미지 샘플이 들어 있다. Digit 이미지는 8x8 matrix로 표현되고 값들은 gray level이다.

100번째 이미지를 보면,

In [None]:
plt.imshow(digits.images[100], cmap=plt.cm.gray_r)
plt.show()
print('labeled:', digits.target[100])

##  Prepare data

In [None]:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))  # flatten images
print('(samples, features):', data.shape)

In [None]:
n_train = round(n_samples * 0.8)
train_data, test_data = data[:n_train], data[n_train:]
train_target, test_target = digits.target[:n_train], digits.target[n_train:]
print('train set:', train_data.shape)
print('test set:', test_data.shape)

## Training

In [None]:
# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)

# train the classifier
classifier.fit(train_data, train_target)

## Prediction

In [None]:
# Now predict the value of the digit on the second half:
expected = test_target
predicted = classifier.predict(test_data)

In [None]:
import pandas as pd

df = pd.DataFrame({'expected': expected, 'predicted': predicted})
# df[df['expected'] != df['predicted']]
df[df.expected != df.predicted]   # show wrong answers

In [None]:
print("Classification report for classifier:\n%s\n" % (classifier))
print(metrics.classification_report(expected, predicted))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))