In [1]:
from tensorflow import keras

import time
import sys
from pathlib import Path
import numpy as np
import os

# Absolute path is needed to load libraries 
ROOT_PATH = os.path.abspath('')
sys.path.append(ROOT_PATH + '/lib')

# from lib.Kmeans_lib import *
# from lib.EvalMetrics import *

from lib.PlotUtils import save_plots
from lib.CustomLayer_lib import Custom_Layer
from lib.utils import create_dataset, RunOneEpoch, TrainSettings
from lib.EvalMetrics import ComputeEvalMetrics2
from lib.Kmeans_lib2 import RunOneEpoch_V2

2022-11-07 22:17:29.531160: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Define settings

settings_train = TrainSettings()
settings_train.verbosity = 'NONE'
settings_train.fill_cmtx = False
settings_train.save_output = False
settings_train.save_plots = False
settings_train.save_extralog = False
settings_train.mode = 'Train'

settings_test = TrainSettings()
settings_test.verbosity = 'EOEINFO'
settings_test.fill_cmtx = True
settings_test.save_output = True
settings_test.save_plots = True
settings_test.save_extralog = True
settings_test.mode = 'Test'

In [3]:
import shutil
shutil.rmtree('Results/', ignore_errors = True)

In [5]:
# File structure:
# /Results/dataset_

if True:
    list_datasets = [[3500, 500]]# Format (n_train, n_test)
    list_features = [10, 30, 50, 100]
    list_batches = [5, 10, 20, 40, 100]
    iterations = 3

for n_train, n_test in list_datasets:
    for idx in range(1, iterations+1):

        print("############# STARTING DATASET {} ############".format(idx))

        # Create directory and log file
        if settings_test.save_output == True:      
            RESULTS_PATH = 'ResultsV2_OL/ds{}_{}_{}/'.format(idx,n_train, n_test)
            os.makedirs(RESULTS_PATH, exist_ok = True)
            settings_test.save_path = RESULTS_PATH
            
            with open(RESULTS_PATH + 'output_log.txt', 'w') as new_file:  # overwrite if already exists
                # File headers
                new_file.write("# Dataset: {}, n_train = {}, n_test = {} \n".format(idx,n_train, n_test))
                new_file.write("# \n")
                new_file.write("# FEATURES\t; BATCH_SZ\t; CST_ERRS\t; CST_ACC\t; MDL_ERRS\t; MDL_ACC\t;\t TIME\t; CST_ERR_ARRAY\t; MDL_ERR_ARRAY\n")
                new_file.close()
            
            with open(RESULTS_PATH + 'clustering_metrics_log.txt', 'w') as new_file:  # overwrite if already exists
                # File headers
                new_file.write("# Dataset: {}, n_train = {}, n_test = {} \n".format(idx,n_train, n_test))
                new_file.write("# \n")
                new_file.write("# FEATURES\t; BATCH_SZ\t; ACCURACY\t; MIC_PREC\t; MIC_RCLL\t;\t MIC_F1\t; MAC_PREC\t; MAC_RCLL\t; MAC_F1\t; WGT_PREC\t; WGT_RCLL\t; WGT_F1\n")
                new_file.close()
                
            with open(RESULTS_PATH + 'model_metrics_log.txt', 'w') as new_file:  # overwrite if already exists
                # File headers
                new_file.write("# Dataset: {}, n_train = {}, n_test = {} \n".format(idx,n_train, n_test))
                new_file.write("# \n")
                new_file.write("# FEATURES\t; BATCH_SZ\t; ACCURACY\t; MIC_PREC\t; MIC_RCLL\t;\t MIC_F1\t; MAC_PREC\t; MAC_RCLL\t; MAC_F1\t; WGT_PREC\t; WGT_RCLL\t; WGT_F1\n")
                new_file.close()

        # Create dataset
        digits_train, label_digits_train, digits_test, label_digits_test = create_dataset(n_train, n_test)

        for n_feat in list_features:

            print("Starting with n_features: ", n_feat)

            #  Set model and features
            MODEL_PATH = 'Models/{}/'.format(n_feat)
            features_saved = np.loadtxt(MODEL_PATH + 'll_features.txt')
            labels_features_saved = np.loadtxt(MODEL_PATH + 'll_labels_features.txt').astype(int)
            keras_model = keras.models.load_model(MODEL_PATH + 'original_mnist_cnn.h5')  # Original model

            for batch_size in list_batches:
                
                print("Starting batch size: ", batch_size)

                # Initialize model
                Model = Custom_Layer(keras_model)
                Model.title = 'OL'
                Model.filename = 'OL'
                Model.l_rate = 0.01
                Model.update_batch_size = 10
                Model.ll_method = 'OL'
                Model.clustering_batch_size = batch_size

                startTime = time.time()
                
                # Train the model
                Model.settings = settings_train
                RunOneEpoch_V2(Model, digits_train, label_digits_train,features_saved, labels_features_saved)

                # Test the model
                Model.settings = settings_test
                RunOneEpoch_V2(Model, digits_test, label_digits_test,features_saved, labels_features_saved)
                
                endTime = time.time()

                # Save log
                # Unpack datalog output
                clust_err = Model.settings.datalog[0]
                model_err = Model.settings.datalog[1]
                clust_err_array = Model.settings.datalog[2]
                model_err_array = Model.settings.datalog[3]

                if Model.settings.save_output == True:
                    with open(RESULTS_PATH + 'output_log.txt', 'a') as new_file:  
                        new_file.write("\t{:3d} \t;\t {:3d} \t;\t".format(n_feat,batch_size))
                        new_file.write("{:3d} \t;\t {:.1%} \t;\t{:3d} \t;\t {:.1%} \t;\t".format(clust_err, 1-clust_err/n_test, model_err, 1-model_err/n_test))
                        new_file.write("{:.3f}\t;\t".format(endTime-startTime))
                        new_file.write("{}\t;\t{}\n".format(clust_err_array, model_err_array))
                    new_file.close()

                #  Save plots
                if Model.settings.save_plots == True:
                    save_plots(Model, RESULTS_PATH + '/Plots', 'clust','Features_{}_Batch_{}'.format(n_feat, batch_size))
                    save_plots(Model, RESULTS_PATH + '/Plots', 'model','Features_{}_Batch_{}'.format(n_feat, batch_size))

                if Model.settings.save_extralog == True:

                    # Extract output
                    true_labels = Model.settings.extralog[0]
                    pseudo_labels = Model.settings.extralog[1]
                    pred_labels = Model.settings.extralog[2]

                    # Compute Clustering metrics
                    metrics = ComputeEvalMetrics2(true_labels, pseudo_labels)
                    with open(RESULTS_PATH + 'clustering_metrics_log.txt', 'a') as new_file: 
                        new_file.write("\t{:3d} \t;\t {:3d}".format(n_feat,batch_size))
                        for i in range(0, len(metrics)):
                            new_file.write(" \t;\t  {:.2f}".format(metrics[i]))
                        new_file.write("\n")
                        new_file.close()

                    # Compute Model metrics
                    metrics = ComputeEvalMetrics2(true_labels, pred_labels)
                    with open(RESULTS_PATH + 'model_metrics_log.txt', 'a') as new_file: 
                        new_file.write("\t{:3d} \t;\t {:3d}".format(n_feat,batch_size))
                        for i in range(0, len(metrics)):
                            new_file.write(" \t;\t  {:.2f}".format(metrics[i]))
                        new_file.write("\n")
                        new_file.close()


                    #save_plots(Model, RESULTS_PATH + '/Metrics
                    
                    
                    # ', 'clust','Features_{}_Batch_{}'.format(n_feat, batch_size))

                    


                


############# STARTING DATASET 1 ############
Starting with n_features:  10
Starting batch size:  5
Starting Train batch: 1/700
New digit detected -> 7
New digit detected -> 9
Starting Train batch: 2/700
New digit detected -> 6
Starting Train batch: 3/700
Starting Train batch: 4/700
New digit detected -> 8
Starting Train batch: 5/700
Starting Train batch: 6/700
Starting Train batch: 7/700
Starting Train batch: 8/700
Starting Train batch: 9/700
Starting Train batch: 10/700
Starting Train batch: 11/700
Starting Train batch: 12/700
Starting Train batch: 13/700
Starting Train batch: 14/700
Starting Train batch: 15/700
Starting Train batch: 16/700
Starting Train batch: 17/700
Starting Train batch: 18/700
Starting Train batch: 19/700
Starting Train batch: 20/700
Starting Train batch: 21/700
Starting Train batch: 22/700
Starting Train batch: 23/700
Starting Train batch: 24/700
Starting Train batch: 25/700
Starting Train batch: 26/700
Starting Train batch: 27/700
Starting Train batch: 28/700
S

In [6]:
# File structure:
# /Results/dataset_

if True:
    list_datasets = [[3500, 500]]# Format (n_train, n_test)
    list_features = [10, 30, 50, 100]
    list_batches = [5, 10, 20, 40, 100]
    iterations = 3

for n_train, n_test in list_datasets:
    for idx in range(1, iterations+1):

        print("############# STARTING DATASET {} ############".format(idx))

        # Create directory and log file
        if settings_test.save_output == True:      
            RESULTS_PATH = 'ResultsV2_CWR/ds{}_{}_{}/'.format(idx,n_train, n_test)
            os.makedirs(RESULTS_PATH, exist_ok = True)
            settings_test.save_path = RESULTS_PATH
            
            with open(RESULTS_PATH + 'output_log.txt', 'w') as new_file:  # overwrite if already exists
                # File headers
                new_file.write("# Dataset: {}, n_train = {}, n_test = {} \n".format(idx,n_train, n_test))
                new_file.write("# \n")
                new_file.write("# FEATURES\t; BATCH_SZ\t; CST_ERRS\t; CST_ACC\t; MDL_ERRS\t; MDL_ACC\t;\t TIME\t; CST_ERR_ARRAY\t; MDL_ERR_ARRAY\n")
                new_file.close()
            
            with open(RESULTS_PATH + 'clustering_metrics_log.txt', 'w') as new_file:  # overwrite if already exists
                # File headers
                new_file.write("# Dataset: {}, n_train = {}, n_test = {} \n".format(idx,n_train, n_test))
                new_file.write("# \n")
                new_file.write("# FEATURES\t; BATCH_SZ\t; ACCURACY\t; MIC_PREC\t; MIC_RCLL\t;\t MIC_F1\t; MAC_PREC\t; MAC_RCLL\t; MAC_F1\t; WGT_PREC\t; WGT_RCLL\t; WGT_F1\n")
                new_file.close()
                
            with open(RESULTS_PATH + 'model_metrics_log.txt', 'w') as new_file:  # overwrite if already exists
                # File headers
                new_file.write("# Dataset: {}, n_train = {}, n_test = {} \n".format(idx,n_train, n_test))
                new_file.write("# \n")
                new_file.write("# FEATURES\t; BATCH_SZ\t; ACCURACY\t; MIC_PREC\t; MIC_RCLL\t;\t MIC_F1\t; MAC_PREC\t; MAC_RCLL\t; MAC_F1\t; WGT_PREC\t; WGT_RCLL\t; WGT_F1\n")
                new_file.close()

        # Create dataset
        digits_train, label_digits_train, digits_test, label_digits_test = create_dataset(n_train, n_test)

        for n_feat in list_features:

            print("Starting with n_features: ", n_feat)

            #  Set model and features
            MODEL_PATH = 'Models/{}/'.format(n_feat)
            features_saved = np.loadtxt(MODEL_PATH + 'll_features.txt')
            labels_features_saved = np.loadtxt(MODEL_PATH + 'll_labels_features.txt').astype(int)
            keras_model = keras.models.load_model(MODEL_PATH + 'original_mnist_cnn.h5')  # Original model

            for batch_size in list_batches:
                
                print("Starting batch size: ", batch_size)

                # Initialize model
                Model = Custom_Layer(keras_model)
                Model.title = 'CWR'
                Model.filename = 'CWR'
                Model.l_rate = 0.01
                Model.update_batch_size = 10
                Model.ll_method = 'CWR'
                Model.clustering_batch_size = batch_size

                startTime = time.time()
                
                # Train the model
                Model.settings = settings_train
                RunOneEpoch_V2(Model, digits_train, label_digits_train,features_saved, labels_features_saved)

                # Test the model
                Model.settings = settings_test
                RunOneEpoch_V2(Model, digits_test, label_digits_test,features_saved, labels_features_saved)
                
                endTime = time.time()

                # Save log
                # Unpack datalog output
                clust_err = Model.settings.datalog[0]
                model_err = Model.settings.datalog[1]
                clust_err_array = Model.settings.datalog[2]
                model_err_array = Model.settings.datalog[3]

                if Model.settings.save_output == True:
                    with open(RESULTS_PATH + 'output_log.txt', 'a') as new_file:  
                        new_file.write("\t{:3d} \t;\t {:3d} \t;\t".format(n_feat,batch_size))
                        new_file.write("{:3d} \t;\t {:.1%} \t;\t{:3d} \t;\t {:.1%} \t;\t".format(clust_err, 1-clust_err/n_test, model_err, 1-model_err/n_test))
                        new_file.write("{:.3f}\t;\t".format(endTime-startTime))
                        new_file.write("{}\t;\t{}\n".format(clust_err_array, model_err_array))
                    new_file.close()

                #  Save plots
                if Model.settings.save_plots == True:
                    save_plots(Model, RESULTS_PATH + '/Plots', 'clust','Features_{}_Batch_{}'.format(n_feat, batch_size))
                    save_plots(Model, RESULTS_PATH + '/Plots', 'model','Features_{}_Batch_{}'.format(n_feat, batch_size))

                if Model.settings.save_extralog == True:

                    # Extract output
                    true_labels = Model.settings.extralog[0]
                    pseudo_labels = Model.settings.extralog[1]
                    pred_labels = Model.settings.extralog[2]

                    # Compute Clustering metrics
                    metrics = ComputeEvalMetrics2(true_labels, pseudo_labels)
                    with open(RESULTS_PATH + 'clustering_metrics_log.txt', 'a') as new_file: 
                        new_file.write("\t{:3d} \t;\t {:3d}".format(n_feat,batch_size))
                        for i in range(0, len(metrics)):
                            new_file.write(" \t;\t  {:.2f}".format(metrics[i]))
                        new_file.write("\n")
                        new_file.close()

                    # Compute Model metrics
                    metrics = ComputeEvalMetrics2(true_labels, pred_labels)
                    with open(RESULTS_PATH + 'model_metrics_log.txt', 'a') as new_file: 
                        new_file.write("\t{:3d} \t;\t {:3d}".format(n_feat,batch_size))
                        for i in range(0, len(metrics)):
                            new_file.write(" \t;\t  {:.2f}".format(metrics[i]))
                        new_file.write("\n")
                        new_file.close()


                    #save_plots(Model, RESULTS_PATH + '/Metrics
                    
                    
                    # ', 'clust','Features_{}_Batch_{}'.format(n_feat, batch_size))

                    


                


############# STARTING DATASET 1 ############
Starting with n_features:  10
Starting batch size:  5
Starting Train batch: 1/700
New digit detected -> 9
New digit detected -> 8
Starting Train batch: 2/700
Starting Train batch: 3/700
New digit detected -> 6
Starting Train batch: 4/700
Starting Train batch: 5/700
Starting Train batch: 6/700
Starting Train batch: 7/700
Starting Train batch: 8/700
New digit detected -> 7
Starting Train batch: 9/700
Starting Train batch: 10/700
Starting Train batch: 11/700
Starting Train batch: 12/700
Starting Train batch: 13/700
Starting Train batch: 14/700
Starting Train batch: 15/700
Starting Train batch: 16/700
Starting Train batch: 17/700
Starting Train batch: 18/700
Starting Train batch: 19/700
Starting Train batch: 20/700
Starting Train batch: 21/700
Starting Train batch: 22/700
Starting Train batch: 23/700
Starting Train batch: 24/700
Starting Train batch: 25/700
Starting Train batch: 26/700
Starting Train batch: 27/700
Starting Train batch: 28/700
S