In [1]:
from tensorflow.keras.datasets import mnist
from tensorflow import keras

import random 
from random import seed

import time
import sys
from pathlib import Path
import numpy as np

# The following line is to avoid showing output of the plots
# Must be imported before matplotlib
#%matplotlib inline

#%matplotlib Qt5Agg

# Absolute path is needed to load libraries 
ROOT_PATH = os.path.abspath('')
sys.path.append(ROOT_PATH + '/lib')

# from lib.Kmeans_lib import *
# from lib.EvalMetrics import *

from lib.simulation_lib import save_plots
from lib.CustomLayer_lib import Custom_Layer, TrainSettings #, RunOneEpoch

2022-10-29 23:55:28.240974: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
(data_train, label_train),(data_test, label_test) = mnist.load_data() # Load data
n_train = 40 # Training samples
n_test = 5 # Training samples
digits_train = np.zeros((n_train,28,28))
digits_test = np.zeros((n_test,28,28))
label_digits_train = np.zeros(n_train)
label_digits_test = np.zeros(n_test)

# Select random images from the dataset
for i in range(0, n_train):
  n = random.randint(0,len(data_train)-1)
  digits_train[i,:,:] = data_train[n,:,:]
  label_digits_train[i] = label_train[n]
for i in range(0, n_test): 
  m = random.randint(0,len(data_test)-1)
  digits_test[i,:,:] = np.copy(data_test[m,:,:])
  label_digits_test[i] = label_test[m]

img_rows, img_cols = 28, 28
digits_train  = digits_train.reshape(digits_train.shape[0], img_rows, img_cols, 1).astype(np.float32) / 255.0
digits_test = digits_test.reshape(digits_test.shape[0], img_rows, img_cols, 1).astype(np.float32) / 255.0

In [3]:
from lib.Kmeans_lib import *
from lib.CustomLayer_lib import *

def RunOneEpoch(model, images, labels, features_saved, labels_saved, settings):

    n_samples = images.shape[0]
    settings.cluster_batch_size = min(settings.cluster_batch_size, len(labels))
    clust_err_array = []
    model_err_array = []

    # BATCH PROCESSING OF DATA
    n_batch = int(np.ceil(n_samples / settings.cluster_batch_size))
    images_batch = np.array_split(images, n_batch)
    labels_batch = np.array_split(labels, n_batch)

    err_clu = 0 # Clustering error (entire epoch)
    err_mod = 0 # Model error (entire epoch)
    pseudolabels = []

    if model.ll_method == 'CWR':
        model_cntr = 0 
        found_digit = np.zeros(10) 
        
    for i in range(0, n_batch):
        print("Starting {} batch: {}/{}".format(settings.mode, i+1, n_batch))
        # Features extraction
        start1 = time.time()
        features_batch = model.ML_frozen.predict(images_batch[i].reshape((settings.cluster_batch_size,28,28,1)), verbose = False)
        end1 = time.time()

        # Kmean clustering
        start2 = time.time()
        pseudolabels_batch, err_clu_batch = k_mean_clustering(features_batch, features_saved, labels_batch[i], labels_saved, settings)
        end2 = time.time()
        pseudolabels.extend(pseudolabels_batch)
        err_clu += err_clu_batch
        clust_err_array.append(err_clu_batch)

        # Last Layer update
        err_mod_batch = 0
        for j in range(len(pseudolabels_batch)):
            if model.ll_method == 'OL':
                prediction = update_ll_OL(model, features_batch[j,:], pseudolabels_batch[j])
                
            if model.ll_method == 'CWR':
                
                if(model_cntr == model.batch_size):
                    prediction, found_digit = update_ll_CWR(model, features_batch[j,:], pseudolabels_batch[j], found_digit, True)
                    model_cntr = 0
                else:
                    prediction, found_digit = update_ll_CWR(model, features_batch[j,:], pseudolabels_batch[j], found_digit, False)
                model_cntr += 1

            if(prediction != labels_batch[i][j]):  
               err_mod_batch += 1

            # Update confusion matrix - posso creare funzione in Custom_layer
            if settings.fill_cmtx == True:
                for k in range(0,len(model.label)):
                    if(prediction == model.std_label[k]):
                        p = np.copy(k)
                    if(labels_batch[i][j] == model.std_label[k]):
                        t = np.copy(k)
                model.conf_matr[t,p] += 1  

        model_err_array.append(err_mod_batch)
        err_mod += err_mod_batch

        if settings.verbosity == 'EOBINFO':
            print("Features extraction took {:.3f} seconds and Kmean clustering took {:.3f} seconds, with {:.1%} accuracy ({} errors)".format(end1-start1, end2-start2, 1-err_clu_batch/settings.cluster_batch_size, err_clu_batch))
            print("Batch Model errors {} ({:.1%} accuracy)".format(err_mod_batch, 1-err_mod_batch/settings.cluster_batch_size))
    
    if settings.verbosity == 'EOEINFO' or settings.verbosity == 'EOBINFO':
        print("Total clustering error: {:.1%} ({}/{} errors, {:.1%} accuracy)".format(err_clu/n_samples, err_clu, n_samples, 1-err_clu/n_samples))
        print("Total model error: {:.1%} ({}/{} errors, {:.1%} accuracy)".format(err_mod/n_samples, err_mod, n_samples, 1-err_mod/n_samples))

    # return clust_err_array, model_err_array
    if settings.save_output == True:
        with open(settings.save_path + 'output_log.txt', 'a') as new_file:
            # Clustering_errors, Model_errors, Clustering error array, Model error array
            new_file.write("{} ; {} ; {} ; {}".format(err_clu, clust_err_array, err_mod, model_err_array))
            new_file.close()

In [4]:
# Define settings
settings_train = TrainSettings()
settings_train.verbosity = 'NONE'
settings_train.clustering_labels = list(range(0, 10))
settings_train.fill_cmtx = False
settings_train.save_output = False
settings_train.save_plots = False
settings_train.mode = 'Train'

settings_test = TrainSettings()
settings_test.verbosity = 'EOEINFO'
settings_test.clustering_labels = list(range(0, 10))
settings_test.fill_cmtx = True
settings_test.save_output = True
settings_test.save_plots = True
settings_test.mode = 'Test'


# Delete Results folder
import shutil
shutil.rmtree('Results/', ignore_errors = True)

# Create the directory to store the results
Path('Results').mkdir(exist_ok=True)


for n_feat in [10, 30, 50, 100]:
    # Set model and features, save path
    MODEL_PATH = 'Models/{}/'.format(n_feat)
    features_saved = np.loadtxt(MODEL_PATH + 'll_features.txt')
    labels_features_saved = np.loadtxt(MODEL_PATH + 'll_labels_features.txt').astype(int)
    keras_model = keras.models.load_model(MODEL_PATH + 'original_mnist_cnn.h5') # Original model 
 
    # Save
    RESULTS_PATH = 'Results/{}/'.format(n_feat)
    settings_test.save_path = RESULTS_PATH
    Path(RESULTS_PATH).mkdir(exist_ok=True) # Create directory if not exists
    
    for batch_size in [5, 10, 20, 40, 100]:

        # Initialize model
        Model = Custom_Layer(keras_model)
        Model.title      = 'OL'
        Model.filename   = 'OL'
        Model.l_rate     = 0.01
        Model.batch_size = 8
        Model.ll_method = 'OL'

        startTime = time.time()
        # Train the model
        settings_train.cluster_batch_size = batch_size
        RunOneEpoch(Model, digits_train, label_digits_train, features_saved, labels_features_saved, settings_train)

        # Test the model
        settings_test.cluster_batch_size = batch_size
        RunOneEpoch(Model, digits_test, label_digits_test, features_saved, labels_features_saved, settings_test)
        endTime = time.time()

        # Save log
        if settings_test.save_output == True:
            with open(settings_test.save_path + 'output_log.txt', 'a') as new_file:
                # Clustering_errors, Model_errors, Clustering error array, Model error array
                new_file.write(" ; {:.3f} \n".format(endTime - startTime))
                new_file.close()

        # Save plots
        if settings_test.save_plots == True:
            save_plots(Model, RESULTS_PATH + '/Plots', batch_size)

2022-10-29 23:55:37.183134: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Starting Train batch: 1/8
New digit detected -> 6
New digit detected -> 8
Starting Train batch: 2/8
Starting Train batch: 3/8
New digit detected -> 7
Starting Train batch: 4/8
New digit detected -> 9
Starting Train batch: 5/8
Starting Train batch: 6/8
Starting Train batch: 7/8
Starting Train batch: 8/8
Starting Test batch: 1/1
Total clustering error: 20.0% (1/5 errors, 80.0% accuracy)
Total model error: 0.0% (0/5 errors, 100.0% accuracy)
Starting Train batch: 1/4
New digit detected -> 6
New digit detected -> 8
Starting Train batch: 2/4
New digit detected -> 7
Starting Train batch: 3/4
Starting Train batch: 4/4
New digit detected -> 9
Starting Test batch: 1/1
Total clustering error: 20.0% (1/5 errors, 80.0% accuracy)
Total model error: 0.0% (0/5 errors, 100.0% accuracy)
Starting Train batch: 1/2
New digit detected -> 6
New digit detected -> 8
New digit detected -> 9
New digit detected -> 7
Starting Train batch: 2/2
Starting Test batch: 1/1
Total clustering error: 20.0% (1/5 errors, 80.0