In [35]:
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn import datasets
from sklearn.semi_supervised import label_propagation
from sklearn.metrics import classification_report, confusion_matrix 

In [36]:
problem_data = datasets.load_digits()
rng = np.random.RandomState(0)
threshold = np.arange(len(problem_data.data))
rng.shuffle(threshold)

In [37]:
x = problem_data.data[threshold[:200]]
y = problem_data.target[threshold[:200]]
images= problem_data.images[threshold[:200]]

if (len(x) == len(y)):
    total_samples = len(x)
    total_points = 10
else: 
    total_samples = null 
    total_points = null 

In [38]:
unlabeled_points = np.arange(total_samples)[total_points:]
figure = plt.figure() 

In [46]:
for i in range(5):
    train_y = np.copy(y)
    train_y[unlabeled_points] = -1
    label_propogation_model = label_propagation.LabelSpreading(gamma=0.25,
                                                              max_iter=5)
    label_propogation_model.fit(x,train_y)
    predicted_labels = label_propogation_model.transduction_[unlabeled_points]
    accurate_labels = y[unlabeled_points]
    
    confusion = confusion_matrix(accurate_labels, predicted_labels,
                                labels = label_propogation_model.classes_)
    
    print('Iteration %i %s' % (i, 70 * '_'))
    print("Label Spreading model: %d labeled & %d unlabeled (%d total)"
         % (total_points,total_samples - total_points,total_samples))
    print(classification_report(accurate_labels,predicted_labels))
    print("Confusion Matrix") 
    print(confusion) 

Iteration 0 ______________________________________________________________________
Label Spreading model: 10 labeled & 190 unlabeled (200 total)
             precision    recall  f1-score   support

          0       0.00      0.00      0.00        14
          1       0.62      0.53      0.57        19
          2       0.52      0.93      0.67        14
          3       0.00      0.00      0.00        14
          4       0.00      0.00      0.00        12
          5       1.00      0.32      0.48        22
          6       0.83      0.96      0.89        25
          7       0.66      1.00      0.79        23
          8       0.46      0.74      0.57        23
          9       0.44      0.75      0.55        24

avg / total       0.52      0.59      0.51       190

Confusion Matrix
[[10  5  0  0  0  2  2]
 [ 0 13  0  0  1  0  0]
 [ 0  0  7  5  0  4  6]
 [ 1  0  0 24  0  0  0]
 [ 0  0  0  0 23  0  0]
 [ 3  3  0  0  0 17  0]
 [ 0  0  0  0  1  5 18]]
Iteration 1 __________________

In [40]:
prediction_entropies = stats.distributions.entropy(label_propogation_model.label_distributions_.T)
print(prediction_entropies)

[  7.28669513e-060   3.46650821e-059   2.77786898e-059   4.47663068e-113
   5.33217165e-095   7.31565538e-060   1.50822540e-064   2.13843484e-088
   2.66892424e-076   3.60445299e-111   4.81753868e-040   4.30783476e-025
   5.15536719e-074   4.25624784e-042   4.63445243e-017   3.04485734e-052
   5.72546936e-015   2.53475170e-043   2.70696961e-021   4.24094185e-003
   2.13600535e-019   6.61986992e-016   4.97390362e-066   2.44769451e-031
   1.03697655e-066   4.86593001e-006   3.04978893e-029   1.73218264e-021
   1.54231967e-062   1.26138640e-019   2.13340848e-033   7.00792986e-060
   6.66482573e-095   8.82525188e-039   4.79057827e-025   5.10159688e-053
   2.62880171e-026   1.73244246e-004   2.54952979e-043   9.66557907e-042
   8.23273411e-021   1.67469405e-087   1.81119965e-064   8.65983907e-060
   1.60994854e-043   2.05820285e-043   9.48987357e-064   5.27373904e-019
   7.91133984e-041   2.26889313e-033   4.88791281e-023   2.16757389e-031
   8.08512786e-067   5.54877842e-002   2.54952969e-

In [55]:
uncertainty_index = np.argsort(prediction_entropies)[-5:]
delete_indices = np.array([])

In [56]:
figure.text(0.05,(1-(i+1)* .183), 
    "model %d\n\nfit with \n%d labels" %
        ((i+1), i*5 + 10), size = 10)

<matplotlib.text.Text at 0x7f778001fcd0>

In [59]:
for index, image_index in enumerate(uncertainty_index):
    image = images[image_index]
    subtitle = figure.add_subplot(5,5,index +1 + (5*i))
    subtitle.imshow(image, cmap=plt.cm.gray_r)
    subtitle.set_title('predict: %i\ntrue: %i' % (
        label_propogation_model.transduction_[image_index],
        y[image_index]),size =10)
    subtitle.axis('off')
    delete_index, = np.where(unlabeled_points == image_index)
    delete_indices = np.concatenate((delete_indices, delete_index))
unlabeled_points = np.delete(unlabeled_points,delete_indices)
total_points += 5 

In [60]:
figure.suptitle("Active Learning with Label Propagation. \nRows show 5 most"
                "uncertain labels to learn wtih the next model.")
plt.subplots_adjust(0.12, 0.03, 0.9, 0.8, 0.2, 0.45)
plt.show()