In [2]:
import numpy as np
import matplotlib.pyplot as plt 

from scipy import stats
from sklearn import datasets
from sklearn.semi_supervised import label_propagation

from sklearn.metrics import confusion_matrix, classification_report 


In [3]:
digits = datasets.load_digits()
rng = np.random.RandomState(0) # sets the seed()
indices = np.arange(len(digits.data))  # provides equally spaced intervals between the length of the data digits
rng.shuffle(indices)

print(rng)
print(indices)
print(rng.shuffle(indices))


<mtrand.RandomState object at 0x7fdadb954d10>
[1081 1707  927 ..., 1653  559  684]
None


In [4]:
X = digits.data[indices[:330]]
y = digits.target[indices[:330]]
images = digits.images[indices[:330]]

n_total_samples = len(y)
n_labeled_points = 30

indices = np.arange(n_total_samples)

unlabeled_set = indices[n_labeled_points:]

# shuffle everything around
y_train = np.copy(y)
y_train[unlabeled_set] = -1

In [7]:
label_propogation_model = label_propagation.LabelSpreading(gamma=0.25, max_iter=5)
label_propogation_model.fit(X,y_train)

predicted_models = label_propogation_model.transduction_[unlabeled_set] 
true_labels = y[unlabeled_set]

confusion_matrix_data = confusion_matrix(true_labels, predicted_models, labels = label_propogation_model.classes_) 

print("Label Spreading model: %d labeled & %d unlabeled points (%d total)" % (n_labeled_points, n_total_samples - 
                                                                              n_labeled_points, n_total_samples)) 
print(classification_report(true_labels, predicted_models))

                                                            


Label Spreading model: 30 labeled & 300 unlabeled points (330 total)
             precision    recall  f1-score   support

          0       0.89      0.96      0.93        26
          1       0.79      0.84      0.81        31
          2       0.89      0.89      0.89        28
          3       1.00      0.65      0.78        31
          4       1.00      0.53      0.69        38
          5       0.67      1.00      0.81        31
          6       0.97      1.00      0.98        28
          7       0.84      0.81      0.82        26
          8       0.78      0.91      0.84        32
          9       0.56      0.66      0.60        29

avg / total       0.84      0.81      0.81       300



In [11]:
print "Confusion Matrix:"
print confusion_matrix_data

Confusion Matrix:
[[25  0  0  0  0  0  0  0  0  1]
 [ 0 26  3  0  0  1  1  0  0  0]
 [ 0  0 25  0  0  0  0  0  3  0]
 [ 0  0  0 20  0  6  0  1  4  0]
 [ 2  3  0  0 20  0  0  3  0 10]
 [ 0  0  0  0  0 31  0  0  0  0]
 [ 0  0  0  0  0  0 28  0  0  0]
 [ 0  0  0  0  0  0  0 21  1  4]
 [ 0  2  0  0  0  1  0  0 29  0]
 [ 1  2  0  0  0  7  0  0  0 19]]


In [16]:
prediction_entropies = stats.distributions.entropy(label_propogation_model.label_distributions_.T) 
uncertainity = np.argsort(prediction_entropies)[-10:]

In [19]:
figure = plt.figure(figsize=(7,5))
for index, image_index in enumerate(uncertainity):
    image = images[image_index]
    sub = figure.add_subplot(2,5,index+1) 
    sub.imshow(image,cmap=plt.cm.gray_r)
    plt.xticks([])
    plt.yticks([])
    sub.set_title('predict: %i\ntrue: %i' % (label_propogation_model.transduction_[image_index], y[image_index]))
figure.suptitle('Semi supervised learning')
plt.show() 