In [1]:
# Imports
from sklearn import datasets, svm, metrics
from sklearn.decomposition import RandomizedPCA
import matplotlib.pyplot as plt

In [2]:
# Load in the `digits` data
digits = datasets.load_digits()

In [3]:
# Show some images and the labels
images_and_labels = list(zip(digits.images, digits.target))
for index, (image, label) in enumerate(images_and_labels[:8]):
    plt.subplot(2, 4, index + 1)
    plt.axis('off')
    plt.imshow(image, cmap=plt.cm.gray_r, interpolation='nearest')
    plt.title('Training: %i' % label)
plt.show()

In [4]:
# To apply a classifier on this data, we need to flatten the image, to
# turn the data in a (samples, feature) matrix:
n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

# Create a classifier: a support vector classifier
classifier = svm.SVC(gamma=0.001)

# We use the first half of the digits for training,
# the second for testing.
train_data = data[:n_samples / 2]
test_data = data[n_samples / 2:]

train_labels = digits.target[:n_samples / 2]
test_labels = digits.target[n_samples / 2:]

# Training
classifier.fit(train_data, train_labels)

# Testing
predicted = classifier.predict(test_data)

print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(test_labels, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_labels, predicted))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

          0       1.00      0.99      0.99        88
          1       0.99      0.97      0.98        91
          2       0.99      0.99      0.99        86
          3       0.98      0.87      0.92        91
          4       0.99      0.96      0.97        92
          5       0.95      0.97      0.96        91
          6       0.99      0.99      0.99        91
          7       0.96      0.99      0.97        89
          8       0.94      1.00      0.97        88
          9       0.93      0.98      0.95        92

avg / total       0.97      0.97      0.97       899


Confusion matrix:
[[87  0  0  0  1  0  0  0  0  0]
 [ 0 88  1  0  0  0  0  0  1  1]
 [ 0  0 85  1 

  if np.rank(self.data) != 1 or np.rank(self.row) != 1 or np.rank(self.col) != 1:


In [5]:
# Create a Randomized PCA model that takes two components
randomized_pca = RandomizedPCA(n_components=2)

print digits.data.shape

# Fit and transform the data to the model
reduced_data_rpca = randomized_pca.fit_transform(digits.data)

print reduced_data_rpca.shape

print digits.data[0]
print reduced_data_rpca[0]

(1797, 64)
(1797, 2)
[  0.   0.   5.  13.   9.   1.   0.   0.   0.   0.  13.  15.  10.  15.   5.
   0.   0.   3.  15.   2.   0.  11.   8.   0.   0.   4.  12.   0.   0.   8.
   8.   0.   0.   5.   8.   0.   0.   9.   8.   0.   0.   4.  11.   0.   1.
  12.   7.   0.   0.   2.  14.   5.  10.  12.   0.   0.   0.   0.   6.  13.
  10.   0.   0.   0.]
[ -1.26204266  21.26913173]




In [6]:
colors = ['black', 'blue', 'purple', 'yellow', 'white', 'red', 'lime', 'cyan', 'orange', 'gray']
for i in range(len(colors)):
    x = reduced_data_rpca[:, 0][digits.target == i]
    y = reduced_data_rpca[:, 1][digits.target == i]
    plt.scatter(x, y, c=colors[i])
plt.legend(digits.target_names, bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.title("PCA Scatter Plot")
plt.show()

  if self._edgecolors == 'face':


In [7]:
classifier2 = svm.SVC(gamma=0.001)

# We use the first half of the digits for training,
# the second for testing.
red_train_data = reduced_data_rpca[:n_samples / 2]
red_test_data = reduced_data_rpca[n_samples / 2:]

# Training
classifier2.fit(red_train_data, train_labels)

# Testing
predicted = classifier2.predict(red_test_data)

print("Classification report for classifier %s:\n%s\n"
      % (classifier, metrics.classification_report(test_labels, predicted)))
print("Confusion matrix:\n%s" % metrics.confusion_matrix(test_labels, predicted))

Classification report for classifier SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

          0       0.86      0.89      0.87        88
          1       0.65      0.53      0.58        91
          2       0.55      0.65      0.60        86
          3       0.64      0.62      0.63        91
          4       0.83      0.71      0.76        92
          5       0.00      0.00      0.00        91
          6       0.70      0.84      0.76        91
          7       0.58      0.78      0.67        89
          8       0.32      0.36      0.34        88
          9       0.51      0.77      0.61        92

avg / total       0.57      0.61      0.58       899


Confusion matrix:
[[78  0  0  0  0  0  6  0  1  3]
 [ 0 48  2  0  5  0  0  8 21  7]
 [ 0  0 56 19 