In [20]:
import numpy as np
import matplotlib.pyplot as plt 

from scipy import stats
from sklearn import datasets
from sklearn.semi_supervised import label_propagation

from sklearn.metrics import confusion_matrix, classification_report 


In [21]:
digits = datasets.load_digits()
rng = np.random.RandomState(0) # sets the seed()
indices = np.arange(len(digits.data))  # provides equally spaced intervals between the length of the data digits
rng.shuffle(indices)

print(rng)
print(indices)
print(rng.shuffle(indices))


<mtrand.RandomState object at 0x7f969816a918>
[1081 1707  927 ..., 1653  559  684]
None


In [27]:
X = digits.data[indices[:80]]
y = digits.target[indices[:80]]
images = digits.images[indices[:80]]

n_total_samples = len(y)
n_labeled_points = 30

indices = np.arange(n_total_samples)

unlabeled_set = indices[n_labeled_points:]

# shuffle everything around
y_train = np.copy(y)
y_train[unlabeled_set] = -1

In [28]:
label_propogation_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5)
label_propogation_model.fit(X,y_train)

predicted_models = label_propogation_model.transduction_[unlabeled_set] 
true_labels = y[unlabeled_set]

confusion_matrix_data = confusion_matrix(true_labels, predicted_models, labels = label_propogation_model.classes_) 

print("Label Spreading model: %d labeled & %d unlabeled points (%d total)" % (n_labeled_points, n_total_samples - 
                                                                              n_labeled_points, n_total_samples)) 
print(classification_report(true_labels, predicted_models))

                                                            


Label Spreading model: 30 labeled & 70 unlabeled points (100 total)
             precision    recall  f1-score   support

          0       1.00      1.00      1.00         8
          1       1.00      1.00      1.00         9
          2       1.00      1.00      1.00         7
          3       1.00      1.00      1.00         9
          4       1.00      1.00      1.00         5
          5       1.00      0.83      0.91         6
          6       1.00      1.00      1.00         8
          7       1.00      1.00      1.00         7
          8       0.83      1.00      0.91         5
          9       0.83      0.83      0.83         6

avg / total       0.97      0.97      0.97        70



In [29]:
print "Confusion Matrix:"
print confusion_matrix_data

Confusion Matrix:
[[8 0 0 0 0 0 0 0 0 0]
 [0 9 0 0 0 0 0 0 0 0]
 [0 0 7 0 0 0 0 0 0 0]
 [0 0 0 9 0 0 0 0 0 0]
 [0 0 0 0 5 0 0 0 0 0]
 [0 0 0 0 0 5 0 0 0 1]
 [0 0 0 0 0 0 8 0 0 0]
 [0 0 0 0 0 0 0 7 0 0]
 [0 0 0 0 0 0 0 0 5 0]
 [0 0 0 0 0 0 0 0 1 5]]


In [30]:
prediction_entropies = stats.distributions.entropy(label_propogation_model.label_distributions_.T) 
uncertainity = np.argsort(prediction_entropies)[-10:]

In [None]:
figure = plt.figure(figsize=(7,5))
for index, image_index in enumerate(uncertainity):
    image = images[image_index]
    sub = figure.add_subplot(2,5,index+1) 
    sub.imshow(image,cmap=plt.cm.gray_r)
    plt.xticks([])
    plt.yticks([])
    sub.set_title('predict: %i\ntrue: %i' % (label_propogation_model.transduction_[image_index], y[image_index]))
figure.suptitle('Semi supervised learning')
plt.show() 