In [5]:
import numpy as np
from sklearn import datasets

digits = datasets.load_digits()
rng = np.random.RandomState(0)
indices = np.arange(len(digits.data))
rng.shuffle(indices)

X=digits.data[indices[:330]]
y= digits.target[indices[:330]]
images =digits.images[indices[:330]]
n_total_samples =len(y)
n_labeled_points =10

# Corrected implementation of self-training with label propagation

# Function to perform self-training
def self_training_label_propagation(X, y, n_total_samples, n_labeled_points, iterations=4):
    accuracies = []
    confusion_matrices = []

    # Initially, only a few points are labeled
    train_indices = np.arange(n_labeled_points)
    test_indices = np.arange(n_labeled_points, n_total_samples)

    # Label array for training, unlabeled points are marked with -1
    labels = np.full(n_total_samples, -1.)
    labels[train_indices] = y[train_indices]

    for i in range(iterations):
        # Fit the label propagation model with the available labels
        label_prop_model = LabelSpreading(kernel='rbf', gamma=0.25, max_iter=5)
        label_prop_model.fit(X, labels)

        # Get the predictions on the test data and update the training labels
        test_labels_pred = label_prop_model.predict(X[test_indices])
        test_labels_probs = label_prop_model.predict_proba(X[test_indices])

        # Select five most confident predictions to add to the train set
        n_new_labels = 5
        # We use entropy as a measure of uncertainty
        entropy = -np.sum(test_labels_probs * np.log(test_labels_probs + 1e-5), axis=1)
        most_confident_indices = test_indices[np.argsort(entropy)[:n_new_labels]]

        # Add these to the labeled set
        labels[most_confident_indices] = y[most_confident_indices]

        # Evaluate the model
        train_accuracy = accuracy_score(y[train_indices], labels[train_indices])
        test_accuracy = accuracy_score(y[test_indices], test_labels_pred)
        cm = confusion_matrix(y[test_indices], test_labels_pred, labels=label_prop_model.classes_)

        # Save the accuracy and confusion matrix
        accuracies.append((train_accuracy, test_accuracy))
        confusion_matrices.append(cm)

        # Update train and test indices, "move" the confident points from test to train
        train_indices = np.concatenate((train_indices, most_confident_indices))
        test_indices = np.setdiff1d(test_indices, most_confident_indices)

        # Print current iteration data
        print(f"Iteration {i + 1}:")
        print(f"Train accuracy: {train_accuracy}")
        print(f"Test accuracy: {test_accuracy}")
        print("Confusion Matrix:")
        print(cm)
        print("\n")

    # Return the last label array and the recorded accuracies
    return labels, accuracies, confusion_matrices

# Perform self-training
final_labels, all_accuracies, all_confusion_matrices = self_training_label_propagation(
    X, y, n_total_samples, n_labeled_points
)

# Displaying the accuracies of the last iteration
print("Final iteration accuracies and confusion matrix:")
all_accuracies[-1], all_confusion_matrices[-1]



Iteration 1:
Train accuracy: 1.0
Test accuracy: 0.625
Confusion Matrix:
[[25  3  0  0  0  0  1]
 [ 1 30  0  0  0  0  0]
 [ 0  0 17  7  0  1 10]
 [ 2  0  0 38  0  0  0]
 [ 0  3  0  0 33  0  0]
 [ 8  0  0  0  0 25  0]
 [ 0  0  3  0  0  2 32]]


Iteration 2:
Train accuracy: 1.0
Test accuracy: 0.6761904761904762
Confusion Matrix:
[[23  0  0  0  0  0  0  0]
 [ 0 15  3  0  0  0 10  1]
 [ 0  1 29  0  0  0  0  0]
 [ 0  0  0 17  7  0  1 10]
 [ 0  2  0  0 38  0  0  0]
 [ 0  0  3  0  0 31  0  0]
 [ 0  4  0  0  0  0 28  0]
 [ 0  0  0  3  0  0  2 32]]


Iteration 3:
Train accuracy: 1.0
Test accuracy: 0.6709677419354839
Confusion Matrix:
[[22  0  0  0  0  0  0  0]
 [ 0 14  3  0  0  0 11  1]
 [ 0  1 27  0  0  2  0  0]
 [ 0  0  0 17  7  0  1 10]
 [ 0  2  0  0 37  0  0  0]
 [ 0  0  0  0  0 33  0  0]
 [ 0  4  0  0  0  0 26  0]
 [ 0  0  0  3  0  0  2 32]]


Iteration 4:
Train accuracy: 1.0
Test accuracy: 0.6524590163934426
Confusion Matrix:
[[20  0  0  0  0  0  0  0]
 [ 0 14  3  0  0  0 11  1]
 [ 0  1 27



((1.0, 0.6524590163934426),
 array([[20,  0,  0,  0,  0,  0,  0,  0],
        [ 0, 14,  3,  0,  0,  0, 11,  1],
        [ 0,  1, 27,  0,  0,  2,  0,  0],
        [ 0,  0,  0, 16,  7,  0,  1, 10],
        [ 0,  1,  0,  0, 37,  0,  0,  0],
        [ 0,  0,  0,  0,  0, 32,  0,  0],
        [ 0,  4,  0,  0,  0,  3, 23,  0],
        [ 0,  0,  0,  3,  0,  2,  2, 30]]))