In [86]:
import os
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

# example of random oversampling to balance the class distribution
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.layers import ConvLSTM2D
from keras.utils import to_categorical
from matplotlib import pyplot
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint

import tensorflow as tf

from scipy import stats

import datetime;
import warnings
warnings.filterwarnings("ignore")

In [87]:
# configs
#EPOCHS = 5
BATCH_SIZE = 32

VERBOSE = 0

WEEKS_TEST= [3]
WEEKS_TRAIN = [0,1,2]


baseFolder = "../data_2019_processed/"
baseFolderWeek = baseFolder+"w"

fileSufixTrain = "_transformed" # unbalanced file sufix is empty -- _transformed
outputFileSufixTrain = "unb" # unbalanced file sufix is unb

# selected features
inputFeatures = ["activity","location","day_of_week",
                 "light","phone_lock","proximity",
                 "sound","time_to_next_alarm", "minutes_day"]
outputClasses = ["awake","asleep"]
#outputClasses = ["class"]

TIME_SERIES_SIZE = -1
TIME_STEP_SHIFT =  -1

NN_type = 'MLP'
UNITS_NUMBER = "16-8"

EPOCHS_ARRAY_TEST = [5,15,30,50,80,100,120,150,200]
EPOCHS_ARRAY_TEST = [1,5]
EPOCHS_ARRAY_TEST = [5,15,30,50,80,100]

generalName = "result_week_tested_"+str(NN_type)+"_"+outputFileSufixTrain+"_batch_size_"+str(BATCH_SIZE)+"_"+UNITS_NUMBER


outputMetricFile = generalName+".csv"
outputMetricFilePartials = generalName+"_partial.csv"
outputCheckpointFolder = generalName+"_checkpoints"
checkpointName_prefix = "checkpoint_epoch_{epoch}.hdf5"

In [88]:
print("Checking whether the checkpoint folder exists or not")
isExist = os.path.exists(outputCheckpointFolder)
if not isExist:
    # Create a new directory because it does not exist
    os.makedirs(outputCheckpointFolder)
    print("The new checkpoint directory is created!")
else:
    print("The checkpoint directory exists!")

Checking whether the checkpoint folder exists or not
The checkpoint directory exists!


In [89]:
outputMetricFile

'result_week_tested_MLP_unb_batch_size_32_16-8.csv'

In [90]:
# y_test     = Array with real values
# yhat_probs = Array with predicted values
def printMetrics(y_test,yhat_probs):
    # predict crisp classes for test set deprecated
    #yhat_classes = model.predict_classes(X_test, verbose=0)
    #yhat_classes = np.argmax(yhat_probs,axis=1)
    yhat_classes = yhat_probs.round()
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test, yhat_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test, yhat_classes)
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test, yhat_classes)
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test, yhat_classes)
    print('F1 score: %f' % f1)
    # kappa
    kappa = cohen_kappa_score(y_test, yhat_classes)
    print('Cohens kappa: %f' % kappa)
    # ROC AUC
    auc = roc_auc_score(y_test, yhat_probs)
    print('ROC AUC: %f' % auc)
    # confusion matrix
    print("\Confusion Matrix")
    matrix = confusion_matrix(y_test, yhat_classes)
    print(matrix)
    
    array = []
    results = dict()
    results['accuracy'] = accuracy
    results['precision'] = precision
    results['recall'] = recall
    results['f1_score'] = f1
    results['cohen_kappa_score'] = kappa
    results['roc_auc_score'] = auc
    #results['matrix'] = np.array(matrix,dtype=object)
    results['matrix'] = 0
    results['TP'] = matrix[0][0]
    results['FP'] = matrix[0][1]
    results['FN'] = matrix[1][0]
    results['TN'] = matrix[1][1]
    
    array.append(accuracy)
    array.append(precision)
    array.append(recall)
    array.append(f1)
    array.append(kappa)
    array.append(auc)
    #array.append(np.array(matrix,dtype=object)))
    array.append(0)
    array.append(matrix[0][0]) # TP
    array.append(matrix[0][1]) # FP
    array.append(matrix[1][0]) # FN
    array.append(matrix[1][1]) # TN
    
    return results, array

def showGlobalMetrics(metrics):
    accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score = 0,0,0,0,0,0
    for metric in metrics:
        accuracy = accuracy + metric['accuracy']
        precision = precision + metric['precision']
        recall = recall + metric['recall']
        f1_score = f1_score + metric['f1_score']
        cohen_kappa_score = cohen_kappa_score + metric['cohen_kappa_score']
        roc_auc_score = roc_auc_score + metric['roc_auc_score']
        
    # mean
    size = len(metrics)
    print(size)
    accuracy = accuracy / size
    precision = precision / size
    recall = recall / size
    f1_score = f1_score / size
    cohen_kappa_score = cohen_kappa_score / size
    roc_auc_score = roc_auc_score / size
    
    #show:\
    print("accuracy: ",accuracy)
    print("precision: ",precision)
    print("recall: ",recall)
    print("f1_score: ",f1_score)
    print("cohen_kappa_score: ",cohen_kappa_score)
    print("roc_auc_score: ",roc_auc_score)
    
    return [accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score]
    
def transform_data_type(dataframe):
    
    # transform inputs
    for column in inputFeatures:
        dataframe[column] = dataframe[column].astype('float32')
    
    # transform outputs
    for column in outputClasses:
        dataframe[column] = dataframe[column].astype('float32')
    
    return dataframe

# one-hot encoding function
def transform_output_nominal_class_into_one_hot_encoding(dataset):
    # create two classes based on the single class
    one_hot_encoded_data = pd.get_dummies(dataset['class'])
    #print(one_hot_encoded_data)
    dataset['awake'] = one_hot_encoded_data['awake']
    dataset['asleep'] = one_hot_encoded_data['asleep']
    
    return dataset

# one-hot encoding function
def transform_output_numerical_class_into_one_hot_encoding(dataset):
    # create two classes based on the single class
    one_hot_encoded_data = pd.get_dummies(dataset['class'])
    #print(one_hot_encoded_data)
    dataset['awake'] = one_hot_encoded_data[0]
    dataset['asleep'] = one_hot_encoded_data[1]
    
    return dataset


def create_dataset_time_series_with_one_output(X, y, window_time_steps=1, shift_step=1):
    Xs, ys = [], []
    for i in range(0, len(X) - window_time_steps, shift_step):
        v = X.iloc[i:(i + window_time_steps)].values
        labels = y.iloc[i: i + window_time_steps]
        Xs.append(v)        
        ys.append(stats.mode(labels)[0][0])
        
    if len(y.columns) == 1:
        return np.array(Xs), np.array(ys).reshape(-1, 1)
    else:
        return np.array(Xs), np.array(ys).reshape(-1, len(y.columns))
    
def create_dataset_time_series_with_one_output_foward(X, y, window_time_steps=1, shift_step=1):
    Xs, ys = [], []
    for i in range(0, len(X) - window_time_steps, shift_step):
        valuesX = X.iloc[i:(i + window_time_steps)].values # values
        valuesY = y.iloc[i: i + window_time_steps].values # labels
        
        Xs.append(valuesX)
        ys.append(valuesY[window_time_steps-1]) # append only the last value
        
    if len(y.columns) == 1:
        return np.array(Xs), np.array(ys).reshape(-1, 1)
    else:
        return np.array(Xs), np.array(ys).reshape(-1, len(y.columns))

In [91]:
# client datasets used on the training process (75% of data)
trainFolders =  ['0Jf4TH9Zzse0Z1Jjh7SnTOe2MMzeSnFi7feTnkG6vgs',
                '0tdmm6rwW3KquQ73ATYYJ5JkpMtvbppJ0VzA2GExdA', 
                '2cyV53lVyUtlMj0BRwilEWtYJwUiviYoL48cZBPBq0', 
                '2J22RukYnEbKTk7t+iUVDBkorcyL5NKN6TrLe89ys', 
                #['5FLZBTVAPwdq9QezHE2sVCJIs7p+r6mCemA2gp9jATk'], #does not have the file
                '7EYF5I04EVqisUJCVNHlqn77UAuOmwL2Dahxd3cA', 
                'a9Qgj8ENWrHvl9QqlXcIPKmyGMKgbfHk9Dbqon1HQP4', 
                'ae4JJBZDycEcY8McJF+3BxyvZ1619y03BNdCxzpZTc', 
                'Ch3u5Oaz96VSrQbf0z31X6jEIbeIekkC0mwPzCdeJ1U', 
                'CH8f0yZkZL13zWuE9ks1CkVJRVrr+jsGdUXHrZ6YeA', 
                'DHO1K4jgiwZJOfQTrxvKE2vn7hkjamigroGD5IaeRc', 
                #'DHPqzSqSttiba1L3BD1cptNJPjSxZ8rXxF9mY3za6WA', # does not have asleep data
                'dQEFscjqnIlug8Tgq97JohhSQPG2DEOWJqS86wCrcY', 
                'HFvs2CohmhHte+AaCzFasjzegGzxZKPhkrX23iI6Xo', 
                'jgB9E8v3Z6PKdTRTCMAijBllA9YEMtrmHbe4qsbmJWw', 
                'JkY++R7E8myldLN3on6iQ78Ee78zCbrLuggfwGju3I', 
                'K4SLohf+TN1Ak8Dn8iE3Lme7rEMPISfppB2sXfHX8', 
                'oGaWetJJJEWHuvYdWYo826SQxfhCExVVQ2da8LE1Y7Q', 
                'pyt24oiDAHsmgWMvkFKz2fn2pwcHiXchd6KchLM', 
                #'PZCf1nfvhR+6fk+7+sPNMYOgb8BAMmtQtfoRS83Suc', # does not have asleep data
                'QUNCATForxzK0HHw46LrGOMWh0eVA8Y5XWEiUXX+cQ', 
                #'rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw', 
                #'RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI', 
                'SH3kQeyd5volraxw8vOyhlowNqWBPr1IJ9URNXUL4']
                #'VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is', 
                #'Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw', 
                #'XCKRE0BWRHxfP1kZIihgtT+jUjSp2GE8v5ZlhcIhVmA', 
                #'YI5Y79K6GXqAUoGP6PNyII8WKlAoel4urDxWSVVOvBw', 
                #'ypklj+8GJ15rOIH1lpKQtFJOuK+VdvyCuBPqhY3aoM', 
                #'ZSsAZ0Pq+MCqFrnjsRFn5Ua09pMCVaOV9c8ZuYb7XQY']
                
# client datasets used on the training process (75% of data)
trainFolders =  ['0Jf4TH9Zzse0Z1Jjh7SnTOe2MMzeSnFi7feTnkG6vgs',
                '0tdmm6rwW3KquQ73ATYYJ5JkpMtvbppJ0VzA2GExdA', 
                '2cyV53lVyUtlMj0BRwilEWtYJwUiviYoL48cZBPBq0', 
                '2J22RukYnEbKTk7t+iUVDBkorcyL5NKN6TrLe89ys', 
                #['5FLZBTVAPwdq9QezHE2sVCJIs7p+r6mCemA2gp9jATk'], #does not have the file
                '7EYF5I04EVqisUJCVNHlqn77UAuOmwL2Dahxd3cA', 
                'a9Qgj8ENWrHvl9QqlXcIPKmyGMKgbfHk9Dbqon1HQP4', 
                'ae4JJBZDycEcY8McJF+3BxyvZ1619y03BNdCxzpZTc', 
                'Ch3u5Oaz96VSrQbf0z31X6jEIbeIekkC0mwPzCdeJ1U', 
                'CH8f0yZkZL13zWuE9ks1CkVJRVrr+jsGdUXHrZ6YeA', 
                'DHO1K4jgiwZJOfQTrxvKE2vn7hkjamigroGD5IaeRc', 
                #'DHPqzSqSttiba1L3BD1cptNJPjSxZ8rXxF9mY3za6WA', # does not have asleep data
                'dQEFscjqnIlug8Tgq97JohhSQPG2DEOWJqS86wCrcY', 
                'HFvs2CohmhHte+AaCzFasjzegGzxZKPhkrX23iI6Xo', 
                'jgB9E8v3Z6PKdTRTCMAijBllA9YEMtrmHbe4qsbmJWw', 
                'JkY++R7E8myldLN3on6iQ78Ee78zCbrLuggfwGju3I', 
                'K4SLohf+TN1Ak8Dn8iE3Lme7rEMPISfppB2sXfHX8', 
                'oGaWetJJJEWHuvYdWYo826SQxfhCExVVQ2da8LE1Y7Q', 
                'pyt24oiDAHsmgWMvkFKz2fn2pwcHiXchd6KchLM', 
                #'PZCf1nfvhR+6fk+7+sPNMYOgb8BAMmtQtfoRS83Suc', # does not have asleep data
                'QUNCATForxzK0HHw46LrGOMWh0eVA8Y5XWEiUXX+cQ', 
                'rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw', 
                'RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI', 
                'SH3kQeyd5volraxw8vOyhlowNqWBPr1IJ9URNXUL4',
                'VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is', 
                'Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw', 
                'XCKRE0BWRHxfP1kZIihgtT+jUjSp2GE8v5ZlhcIhVmA', 
                'YI5Y79K6GXqAUoGP6PNyII8WKlAoel4urDxWSVVOvBw', 
                'ypklj+8GJ15rOIH1lpKQtFJOuK+VdvyCuBPqhY3aoM', 
                'ZSsAZ0Pq+MCqFrnjsRFn5Ua09pMCVaOV9c8ZuYb7XQY']
            
# client datasets used on the training process (25% of data)
testFolders =  [#'0Jf4TH9Zzse0Z1Jjh7SnTOe2MMzeSnFi7feTnkG6vgs',
                #'0tdmm6rwW3KquQ73ATYYJ5JkpMtvbppJ0VzA2GExdA', 
                #'2cyV53lVyUtlMj0BRwilEWtYJwUiviYoL48cZBPBq0', 
                #'2J22RukYnEbKTk7t+iUVDBkorcyL5NKN6TrLe89ys', 
                #['5FLZBTVAPwdq9QezHE2sVCJIs7p+r6mCemA2gp9jATk'], #does not have the file
                #'7EYF5I04EVqisUJCVNHlqn77UAuOmwL2Dahxd3cA', 
                #'a9Qgj8ENWrHvl9QqlXcIPKmyGMKgbfHk9Dbqon1HQP4', 
                #'ae4JJBZDycEcY8McJF+3BxyvZ1619y03BNdCxzpZTc', 
                #'Ch3u5Oaz96VSrQbf0z31X6jEIbeIekkC0mwPzCdeJ1U', 
                #'CH8f0yZkZL13zWuE9ks1CkVJRVrr+jsGdUXHrZ6YeA', 
                #'DHO1K4jgiwZJOfQTrxvKE2vn7hkjamigroGD5IaeRc', 
                #'DHPqzSqSttiba1L3BD1cptNJPjSxZ8rXxF9mY3za6WA', # does not have asleep data
                #'dQEFscjqnIlug8Tgq97JohhSQPG2DEOWJqS86wCrcY', 
                #'HFvs2CohmhHte+AaCzFasjzegGzxZKPhkrX23iI6Xo', 
                #'jgB9E8v3Z6PKdTRTCMAijBllA9YEMtrmHbe4qsbmJWw', 
                #'JkY++R7E8myldLN3on6iQ78Ee78zCbrLuggfwGju3I', 
                #'K4SLohf+TN1Ak8Dn8iE3Lme7rEMPISfppB2sXfHX8', 
                #'oGaWetJJJEWHuvYdWYo826SQxfhCExVVQ2da8LE1Y7Q', 
                #'pyt24oiDAHsmgWMvkFKz2fn2pwcHiXchd6KchLM', 
                #'PZCf1nfvhR+6fk+7+sPNMYOgb8BAMmtQtfoRS83Suc', # does not have asleep data
                #'QUNCATForxzK0HHw46LrGOMWh0eVA8Y5XWEiUXX+cQ', 
                'rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw', 
                'RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI', 
                #'SH3kQeyd5volraxw8vOyhlowNqWBPr1IJ9URNXUL4'] 
                'VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is', 
                'Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw', 
                'XCKRE0BWRHxfP1kZIihgtT+jUjSp2GE8v5ZlhcIhVmA', 
                'YI5Y79K6GXqAUoGP6PNyII8WKlAoel4urDxWSVVOvBw', 
                'ypklj+8GJ15rOIH1lpKQtFJOuK+VdvyCuBPqhY3aoM', 
                'ZSsAZ0Pq+MCqFrnjsRFn5Ua09pMCVaOV9c8ZuYb7XQY']

# take the list of directories and concat them
def loadDataFromFolders(foldersToLoad,inputFolders,fileType = "_transformed"):
    print(len(foldersToLoad), "datasets")
    for i in range(0,len(foldersToLoad)):
        currentFolder = foldersToLoad[i]
        print(i , "-", currentFolder,inputFolders+"/student_"+currentFolder+""+fileType+".csv")
        #print(trainingDataSet[i])
        if(i == 0):
            temp_data = pd.read_csv(inputFolders+"/student_"+currentFolder+""+fileType+".csv")
        else:
            dataset = pd.read_csv(inputFolders+"/student_"+currentFolder+""+fileType+".csv")
            temp_data = pd.concat([temp_data, dataset])
    # return the dataset        
    return temp_data

In [92]:
print("Preparing test data")

# WEEKS_TEST= [3]
# WEEKS_TRAIN = [0,1,2]
# initialize
print("Initializing with first week data")

folderToLoad = baseFolderWeek + str(WEEKS_TEST[0]) + "/"
X_test = loadDataFromFolders(trainFolders,folderToLoad,"_transformed")

X_test2 = loadDataFromFolders(testFolders,baseFolder,"_transformed")

for week_value in WEEKS_TEST:

    if week_value != WEEKS_TEST[0]:
        
        
        print("Loading week:",week_value)
        
        folderToLoad = baseFolderWeek + str(week_value) + "/"

        # test data comprising 25% of the data. It must be fixed to all models being evaluated
        #X_test  = pd.read_csv(inputFolders+"test/allData-classification-numeric-normalized.csv")
        temp_data = loadDataFromFolders(trainFolders,folderToLoad,"_transformed")

        X_test = pd.concat([temp_data, X_test])
        print()
        # undestand the dataset by looking on their infos
        print(X_test.info())

X_test

Preparing test data
Initializing with first week data
27 datasets
0 - 0Jf4TH9Zzse0Z1Jjh7SnTOe2MMzeSnFi7feTnkG6vgs ../data_2019_processed/w3//student_0Jf4TH9Zzse0Z1Jjh7SnTOe2MMzeSnFi7feTnkG6vgs_transformed.csv
1 - 0tdmm6rwW3KquQ73ATYYJ5JkpMtvbppJ0VzA2GExdA ../data_2019_processed/w3//student_0tdmm6rwW3KquQ73ATYYJ5JkpMtvbppJ0VzA2GExdA_transformed.csv
2 - 2cyV53lVyUtlMj0BRwilEWtYJwUiviYoL48cZBPBq0 ../data_2019_processed/w3//student_2cyV53lVyUtlMj0BRwilEWtYJwUiviYoL48cZBPBq0_transformed.csv
3 - 2J22RukYnEbKTk7t+iUVDBkorcyL5NKN6TrLe89ys ../data_2019_processed/w3//student_2J22RukYnEbKTk7t+iUVDBkorcyL5NKN6TrLe89ys_transformed.csv
4 - 7EYF5I04EVqisUJCVNHlqn77UAuOmwL2Dahxd3cA ../data_2019_processed/w3//student_7EYF5I04EVqisUJCVNHlqn77UAuOmwL2Dahxd3cA_transformed.csv
5 - a9Qgj8ENWrHvl9QqlXcIPKmyGMKgbfHk9Dbqon1HQP4 ../data_2019_processed/w3//student_a9Qgj8ENWrHvl9QqlXcIPKmyGMKgbfHk9Dbqon1HQP4_transformed.csv
6 - ae4JJBZDycEcY8McJF+3BxyvZ1619y03BNdCxzpZTc ../data_2019_processed/w3//student_ae4JJBZD

Unnamed: 0,activity,location,timestamp,time_to_next_alarm,sound,proximity,phone_lock,light,day_of_week,minutes_day,timestamp_text,class
0,0.00,0.0,0.000000e+00,0.987401,0.000000,1.0,0.0,0.206366,0.0,0.591383,2018-06-02 14:11:34+00:00,awake
1,0.25,0.5,0.000000e+00,0.987302,0.629973,1.0,0.0,0.497139,0.0,0.592078,2018-06-02 14:12:04+00:00,awake
2,0.00,0.5,3.224502e-07,0.987302,0.629973,1.0,0.0,0.079399,0.0,0.592078,2018-06-02 14:12:34+00:00,awake
3,0.00,0.5,3.224502e-07,0.987202,0.569047,1.0,0.0,0.145207,0.0,0.592773,2018-06-02 14:13:04+00:00,awake
4,0.00,0.5,3.224502e-07,0.987202,0.569047,1.0,0.0,0.151288,0.0,0.592773,2018-06-02 14:13:34+00:00,awake
...,...,...,...,...,...,...,...,...,...,...,...,...
1034,1.00,0.0,5.320428e-04,0.923909,0.577338,1.0,0.0,0.003219,1.0,0.035441,2018-06-05 00:51:21+00:00,awake
1035,1.00,0.0,5.320428e-04,0.923809,0.612109,1.0,0.0,0.002861,1.0,0.036136,2018-06-05 00:52:21+00:00,awake
1036,0.00,0.0,5.323653e-04,0.923611,0.573415,1.0,0.0,0.001431,1.0,0.037526,2018-06-05 00:54:11+00:00,awake
1037,0.75,0.0,5.326878e-04,0.923611,0.538685,1.0,0.0,0.001073,1.0,0.037526,2018-06-05 00:54:41+00:00,awake


In [113]:
X_test2

Unnamed: 0,activity,location,timestamp,time_to_next_alarm,sound,proximity,phone_lock,light,day_of_week,minutes_day,timestamp_text,class
0,0.75,1.0,0.000000e+00,0.000000,0.515992,1.0,0.0,0.000000,1.0,0.678249,2018-05-14 16:16:08+00:00,awake
1,0.25,1.0,3.211282e-07,0.000000,0.542171,0.0,1.0,0.000007,1.0,0.678944,2018-05-14 16:17:39+00:00,awake
2,0.25,1.0,6.422564e-07,0.000000,0.515992,0.0,1.0,0.000000,1.0,0.679639,2018-05-14 16:18:39+00:00,awake
3,0.00,1.0,6.422564e-07,0.000000,0.515992,0.0,1.0,0.000000,1.0,0.680334,2018-05-14 16:19:09+00:00,awake
4,0.25,1.0,6.422564e-07,0.000000,0.531341,0.0,1.0,0.000000,1.0,0.681028,2018-05-14 16:20:09+00:00,awake
...,...,...,...,...,...,...,...,...,...,...,...,...
13208,1.00,0.0,3.981050e-03,0.923909,0.577338,1.0,0.0,0.000266,1.0,0.035441,2018-06-05 00:51:21+00:00,awake
13209,1.00,0.0,3.981050e-03,0.923809,0.612109,1.0,0.0,0.000236,1.0,0.036136,2018-06-05 00:52:21+00:00,awake
13210,0.00,0.0,3.981371e-03,0.923611,0.573415,1.0,0.0,0.000118,1.0,0.037526,2018-06-05 00:54:11+00:00,awake
13211,0.75,0.0,3.981693e-03,0.923611,0.538685,1.0,0.0,0.000089,1.0,0.037526,2018-06-05 00:54:41+00:00,awake


In [93]:
print("Preparing X_train data")
# WEEKS_TEST= [3]
# WEEKS_TRAIN = [0,1,2]
# initialize
print("Initializing with first week data")

folderToLoad = baseFolderWeek + str(WEEKS_TRAIN[0]) + "/"
X_train = loadDataFromFolders(testFolders,folderToLoad,"_transformed")

for week_value in WEEKS_TRAIN:

    if week_value != WEEKS_TRAIN[0]:
        
        
        print("Loading week:",week_value)
        
        folderToLoad = baseFolderWeek + str(week_value) + "/"

        # test data comprising 25% of the data. It must be fixed to all models being evaluated
        #X_test  = pd.read_csv(inputFolders+"test/allData-classification-numeric-normalized.csv")
        temp_data = loadDataFromFolders(trainFolders,folderToLoad,"_transformed")

        X_train = pd.concat([temp_data, X_test])
        print()
        # undestand the dataset by looking on their infos
        print(X_train.info())

X_train

Preparing X_train data
Initializing with first week data
8 datasets
0 - rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw ../data_2019_processed/w0//student_rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw_transformed.csv
1 - RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI ../data_2019_processed/w0//student_RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI_transformed.csv
2 - VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is ../data_2019_processed/w0//student_VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is_transformed.csv
3 - Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw ../data_2019_processed/w0//student_Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw_transformed.csv
4 - XCKRE0BWRHxfP1kZIihgtT+jUjSp2GE8v5ZlhcIhVmA ../data_2019_processed/w0//student_XCKRE0BWRHxfP1kZIihgtT+jUjSp2GE8v5ZlhcIhVmA_transformed.csv
5 - YI5Y79K6GXqAUoGP6PNyII8WKlAoel4urDxWSVVOvBw ../data_2019_processed/w0//student_YI5Y79K6GXqAUoGP6PNyII8WKlAoel4urDxWSVVOvBw_transformed.csv
6 - ypklj+8GJ15rOIH1lpKQtFJOuK+VdvyCuBPqhY3aoM ../data_2019_processed/w0//st

16 - pyt24oiDAHsmgWMvkFKz2fn2pwcHiXchd6KchLM ../data_2019_processed/w2//student_pyt24oiDAHsmgWMvkFKz2fn2pwcHiXchd6KchLM_transformed.csv
17 - QUNCATForxzK0HHw46LrGOMWh0eVA8Y5XWEiUXX+cQ ../data_2019_processed/w2//student_QUNCATForxzK0HHw46LrGOMWh0eVA8Y5XWEiUXX+cQ_transformed.csv
18 - rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw ../data_2019_processed/w2//student_rIl2UK9+bQ+tzpFdbJAdbBxEa5GbgrgC030yEaENLw_transformed.csv
19 - RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI ../data_2019_processed/w2//student_RoBW3cDOO9wWRMPO2twQff83MPc+OXn6gJ+a1DafreI_transformed.csv
20 - SH3kQeyd5volraxw8vOyhlowNqWBPr1IJ9URNXUL4 ../data_2019_processed/w2//student_SH3kQeyd5volraxw8vOyhlowNqWBPr1IJ9URNXUL4_transformed.csv
21 - VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is ../data_2019_processed/w2//student_VVpwFNMrEglveh6MDN8lrRzTy5OwzglD4FURfM4A2is_transformed.csv
22 - Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw ../data_2019_processed/w2//student_Wa1mcNmbh66S7VS6GIzyfCFMD3SGhbtDQyFP1ywJEsw_transformed.csv
23 - XCK

Unnamed: 0,activity,location,timestamp,time_to_next_alarm,sound,proximity,phone_lock,light,day_of_week,minutes_day,timestamp_text,class
0,0.00,0.0,0.000000e+00,0.969841,0.000000,1.0,0.0,0.000207,0.666667,0.714385,2018-05-26 17:08:23+00:00,awake
1,0.25,1.0,0.000000e+00,0.969841,0.616077,1.0,1.0,0.000384,0.666667,0.714385,2018-05-26 17:08:53+00:00,awake
2,0.25,1.0,0.000000e+00,0.969742,0.616077,1.0,1.0,0.000384,0.666667,0.715080,2018-05-26 17:09:23+00:00,awake
3,0.25,1.0,3.219683e-07,0.969643,0.723862,1.0,1.0,0.000384,0.666667,0.715775,2018-05-26 17:10:54+00:00,awake
4,0.25,1.0,3.219683e-07,0.969544,0.723862,1.0,1.0,0.000384,0.666667,0.716470,2018-05-26 17:11:24+00:00,awake
...,...,...,...,...,...,...,...,...,...,...,...,...
1034,1.00,0.0,5.320428e-04,0.923909,0.577338,1.0,0.0,0.003219,1.000000,0.035441,2018-06-05 00:51:21+00:00,awake
1035,1.00,0.0,5.320428e-04,0.923809,0.612109,1.0,0.0,0.002861,1.000000,0.036136,2018-06-05 00:52:21+00:00,awake
1036,0.00,0.0,5.323653e-04,0.923611,0.573415,1.0,0.0,0.001431,1.000000,0.037526,2018-06-05 00:54:11+00:00,awake
1037,0.75,0.0,5.326878e-04,0.923611,0.538685,1.0,0.0,0.001073,1.000000,0.037526,2018-06-05 00:54:41+00:00,awake


In [94]:
#X_train = pd.read_csv(baseFolder+"train/allData-classification-numeric-normalized.csv")
#X_test  = pd.read_csv(baseFolder+"test/allData-classification-numeric-normalized.csv")
#X_train = pd.read_csv(baseFolder+"train/allData-classification-numeric-normalized_balanced_undersample.csv")
#X_test  = pd.read_csv(baseFolder+"test/allData-classification-numeric-normalized_balanced_oversample.csv")

#AA = pd.read_csv(baseFolder+"allData-classification-numeric-normalized.csv")
#X_train, X_test = train_test_split(AA,test_size=0.25)

In [95]:
print(X_train.info())
X_train

<class 'pandas.core.frame.DataFrame'>
Index: 169101 entries, 0 to 1038
Data columns (total 12 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   activity            169101 non-null  float64
 1   location            169101 non-null  float64
 2   timestamp           169101 non-null  float64
 3   time_to_next_alarm  169101 non-null  float64
 4   sound               169101 non-null  float64
 5   proximity           169101 non-null  float64
 6   phone_lock          169101 non-null  float64
 7   light               169101 non-null  float64
 8   day_of_week         169101 non-null  float64
 9   minutes_day         169101 non-null  float64
 10  timestamp_text      169101 non-null  object 
 11  class               169101 non-null  object 
dtypes: float64(10), object(2)
memory usage: 16.8+ MB
None


Unnamed: 0,activity,location,timestamp,time_to_next_alarm,sound,proximity,phone_lock,light,day_of_week,minutes_day,timestamp_text,class
0,0.00,0.0,0.000000e+00,0.969841,0.000000,1.0,0.0,0.000207,0.666667,0.714385,2018-05-26 17:08:23+00:00,awake
1,0.25,1.0,0.000000e+00,0.969841,0.616077,1.0,1.0,0.000384,0.666667,0.714385,2018-05-26 17:08:53+00:00,awake
2,0.25,1.0,0.000000e+00,0.969742,0.616077,1.0,1.0,0.000384,0.666667,0.715080,2018-05-26 17:09:23+00:00,awake
3,0.25,1.0,3.219683e-07,0.969643,0.723862,1.0,1.0,0.000384,0.666667,0.715775,2018-05-26 17:10:54+00:00,awake
4,0.25,1.0,3.219683e-07,0.969544,0.723862,1.0,1.0,0.000384,0.666667,0.716470,2018-05-26 17:11:24+00:00,awake
...,...,...,...,...,...,...,...,...,...,...,...,...
1034,1.00,0.0,5.320428e-04,0.923909,0.577338,1.0,0.0,0.003219,1.000000,0.035441,2018-06-05 00:51:21+00:00,awake
1035,1.00,0.0,5.320428e-04,0.923809,0.612109,1.0,0.0,0.002861,1.000000,0.036136,2018-06-05 00:52:21+00:00,awake
1036,0.00,0.0,5.323653e-04,0.923611,0.573415,1.0,0.0,0.001431,1.000000,0.037526,2018-06-05 00:54:11+00:00,awake
1037,0.75,0.0,5.326878e-04,0.923611,0.538685,1.0,0.0,0.001073,1.000000,0.037526,2018-06-05 00:54:41+00:00,awake


In [114]:
# transform output to one_hot_encoding for the testing dataset
X_test = transform_output_nominal_class_into_one_hot_encoding(X_test)

# transform output to one_hot_encoding for the testing dataset
X_test2 = transform_output_nominal_class_into_one_hot_encoding(X_test2)

# transform output to one_hot_encoding for the testing dataset
X_train = transform_output_nominal_class_into_one_hot_encoding(X_train)


# transforms the input data to float32
X_test = transform_data_type(X_test)

# transforms the input data to float32
X_test2 = transform_data_type(X_test2)

# transforms the input data to float32
X_train = transform_data_type(X_train)

In [97]:
X_train

Unnamed: 0,activity,location,timestamp,time_to_next_alarm,sound,proximity,phone_lock,light,day_of_week,minutes_day,timestamp_text,class,awake,asleep
0,0.00,0.0,0.000000e+00,0.969841,0.000000,1.0,0.0,0.000207,0.666667,0.714385,2018-05-26 17:08:23+00:00,awake,1.0,0.0
1,0.25,1.0,0.000000e+00,0.969841,0.616077,1.0,1.0,0.000384,0.666667,0.714385,2018-05-26 17:08:53+00:00,awake,1.0,0.0
2,0.25,1.0,0.000000e+00,0.969742,0.616077,1.0,1.0,0.000384,0.666667,0.715080,2018-05-26 17:09:23+00:00,awake,1.0,0.0
3,0.25,1.0,3.219683e-07,0.969643,0.723862,1.0,1.0,0.000384,0.666667,0.715775,2018-05-26 17:10:54+00:00,awake,1.0,0.0
4,0.25,1.0,3.219683e-07,0.969544,0.723862,1.0,1.0,0.000384,0.666667,0.716470,2018-05-26 17:11:24+00:00,awake,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1034,1.00,0.0,5.320428e-04,0.923909,0.577338,1.0,0.0,0.003219,1.000000,0.035441,2018-06-05 00:51:21+00:00,awake,1.0,0.0
1035,1.00,0.0,5.320428e-04,0.923810,0.612109,1.0,0.0,0.002861,1.000000,0.036136,2018-06-05 00:52:21+00:00,awake,1.0,0.0
1036,0.00,0.0,5.323653e-04,0.923611,0.573415,1.0,0.0,0.001431,1.000000,0.037526,2018-06-05 00:54:11+00:00,awake,1.0,0.0
1037,0.75,0.0,5.326878e-04,0.923611,0.538685,1.0,0.0,0.001073,1.000000,0.037526,2018-06-05 00:54:41+00:00,awake,1.0,0.0


In [116]:
# selects the data to train and test
X_train_data = pd.DataFrame(data=X_train,columns=inputFeatures)
y_train_data = pd.DataFrame(data=X_train,columns=outputClasses)
# selec test dataset (fixed to all)
X_test_data = pd.DataFrame(data=X_test,columns=inputFeatures)
y_test_data = pd.DataFrame(data=X_test,columns=outputClasses)

# selec test dataset (fixed to all)
X_test2_data = pd.DataFrame(data=X_test2,columns=inputFeatures)
y_test2_data = pd.DataFrame(data=X_test2,columns=outputClasses)

In [99]:
X_test_data

Unnamed: 0,activity,location,day_of_week,light,phone_lock,proximity,sound,time_to_next_alarm,minutes_day
0,0.00,0.0,0.0,0.206366,0.0,1.0,0.000000,0.987401,0.591383
1,0.25,0.5,0.0,0.497139,0.0,1.0,0.629973,0.987302,0.592078
2,0.00,0.5,0.0,0.079399,0.0,1.0,0.629973,0.987302,0.592078
3,0.00,0.5,0.0,0.145207,0.0,1.0,0.569047,0.987202,0.592773
4,0.00,0.5,0.0,0.151288,0.0,1.0,0.569047,0.987202,0.592773
...,...,...,...,...,...,...,...,...,...
1034,1.00,0.0,1.0,0.003219,0.0,1.0,0.577338,0.923909,0.035441
1035,1.00,0.0,1.0,0.002861,0.0,1.0,0.612109,0.923810,0.036136
1036,0.00,0.0,1.0,0.001431,0.0,1.0,0.573415,0.923611,0.037526
1037,0.75,0.0,1.0,0.001073,0.0,1.0,0.538685,0.923611,0.037526


In [112]:
y_test2_data

Unnamed: 0,awake,asleep
0,,
1,,
2,,
3,,
4,,
...,...,...
13208,,
13209,,
13210,,
13211,,


In [100]:
# transtorm data to tensor slices
test_dataset_slice = tf.data.Dataset.from_tensor_slices((X_test_data, y_test_data))
test2_dataset_slice = tf.data.Dataset.from_tensor_slices((X_test2_data, y_test2_data))
train_dataset_slice = tf.data.Dataset.from_tensor_slices((X_train_data, y_train_data))

In [101]:
# batch_size
train_dataset_batch = train_dataset_slice.batch(BATCH_SIZE)

train_dataset_batch

<BatchDataset element_spec=(TensorSpec(shape=(None, 9), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))>

In [102]:
# batch_size
validation_dataset_batch = test_dataset_slice.batch(BATCH_SIZE)

validation_dataset_batch

<BatchDataset element_spec=(TensorSpec(shape=(None, 9), dtype=tf.float32, name=None), TensorSpec(shape=(None, 2), dtype=tf.float32, name=None))>

In [103]:
MAX_EPOCH = max(EPOCHS_ARRAY_TEST)

MAX_EPOCH

100

In [104]:
print("configure checkpoint")
filepath = outputCheckpointFolder + "/" + checkpointName_prefix
checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=VERBOSE, save_best_only=False, mode='auto')
#WARNING:tensorflow:`period` argument is deprecated. Please use `save_freq` to specify the frequency in number of batches seen.


configure checkpoint


In [105]:
#generate model
model = keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(len(inputFeatures),)))
model.add(tf.keras.layers.Dense(len(inputFeatures)))
model.add(tf.keras.layers.Dense(16, activation=tf.keras.activations.relu))
model.add(tf.keras.layers.Dense(8, activation=tf.keras.activations.relu),)
model.add(tf.keras.layers.Dense(len(outputClasses), activation='softmax'))

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.CategoricalAccuracy()])
          #loss='binary_crossentropy',loss='categorical_crossentropy',
          #loss='binary_crossentropy',  sparse_categorical_crossentropy 
        
print("input_shape=[", X_train_data.shape[0], X_train_data.shape[1],"]")
print("output shape:",len(outputClasses))
print("Epochs:",MAX_EPOCH)

current_time = datetime.datetime.now()
time_stamp = current_time.timestamp()
print("Start timestamp:", time_stamp,current_time)
print()

verbose, epochs, batch_size = VERBOSE, MAX_EPOCH, BATCH_SIZE  
# fit network
history = model.fit(train_dataset_batch, 
                    epochs=epochs, 
                    verbose=verbose, 
                    batch_size=batch_size, 
                    callbacks=[checkpoint],
                    validation_data=validation_dataset_batch) #, batch_size=batch_size, validation_split=0.1

# generate time metrics
current_time2 = datetime.datetime.now()
time_stamp2 = current_time2.timestamp()
processing_time_s = (time_stamp2-time_stamp)
print("End timestamp:", time_stamp2,current_time2)
print("Processing time (s):", (processing_time_s))
print("Processing time (m):", (processing_time_s/60))
print("Processing time (h):", ((processing_time_s/60)/60))

input_shape=[ 169101 9 ]
output shape: 2
Epochs: 100
Start timestamp: 1709648502.387677 2024-03-05 11:21:42.387677

End timestamp: 1709649195.673509 2024-03-05 11:33:15.673509
Processing time (s): 693.2858319282532
Processing time (m): 11.554763865470886
Processing time (h): 0.1925793977578481


In [106]:
dfhistory = pd.DataFrame(data=history.history) 

print("print all loss, ",len(dfhistory))
history

outputHistoryFilepath = outputCheckpointFolder+"/train_metrics_history.csv"
print("save all loss on ",outputHistoryFilepath)
dfhistory.to_csv(outputHistoryFilepath, sep=',', encoding='utf-8', index=False)

# outputCheckpointFolder
#history.history.to_csv(outputMetricFile, sep=',', encoding='utf-8', index=False)
# loss and val_loss
# acc and val_acc
history.history["loss"]
history.history["categorical_accuracy"]

dfhistory

print all loss,  100
save all loss on  result_week_tested_MLP_unb_batch_size_32_16-8_checkpoints/train_metrics_history.csv


Unnamed: 0,loss,categorical_accuracy,val_loss,val_categorical_accuracy
0,0.375636,0.823366,0.290085,0.878730
1,0.247694,0.884672,0.108220,0.970164
2,0.230282,0.889628,0.080134,0.975938
3,0.207920,0.904903,0.070812,0.977863
4,0.192016,0.916866,0.066241,0.979788
...,...,...,...,...
95,0.052037,0.980858,0.004604,0.999038
96,0.058837,0.980189,0.004291,0.999038
97,0.058390,0.979598,0.004365,1.000000
98,0.057393,0.979793,0.005070,0.999038


In [107]:
columnsOutputMetrics = ['NN_type','units','epochs','batch_size','window_size','time_step_shift',
           'start_time','end_time','time_s','time_m', "train_accuracy", "train_loss", "val_accuracy", "val_loss",
           'class','accuracy','precision','recall','f1_score','cohen_kappa_score','roc_auc_score','confusion_matrix',
           'TP','FP','FN','TN']

allMetrics = []

In [108]:
#generate model
model = keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(len(inputFeatures),)))
model.add(tf.keras.layers.Dense(len(inputFeatures)))
model.add(tf.keras.layers.Dense(16, activation=tf.keras.activations.relu))
model.add(tf.keras.layers.Dense(8, activation=tf.keras.activations.relu),)
model.add(tf.keras.layers.Dense(len(outputClasses), activation='softmax'))

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.CategoricalAccuracy()])
          #loss='binary_crossentropy',loss='categorical_crossentropy',
          #loss='binary_crossentropy',  sparse_categorical_crossentropy 

for TEST_EPOCHS in EPOCHS_ARRAY_TEST:
    # general data from the run
    generalData = [NN_type,UNITS_NUMBER,TEST_EPOCHS,BATCH_SIZE,TIME_SERIES_SIZE,TIME_STEP_SHIFT]
    
    print("input_shape=[", X_train_data.shape,"]")
    print("output shape:",len(outputClasses))
    print("Epochs:",TEST_EPOCHS)
    
    current_time = datetime.datetime.now()
    time_stamp = current_time.timestamp()
    print("Start timestamp:", time_stamp,current_time)
    print()
    
    checkPpath = filepath.replace("{epoch}",str(TEST_EPOCHS))
    print("Loading checkpoint: ", checkPpath)
    print('')
    
    model.load_weights(checkPpath)
    # evaluate model
    #accuracy = model.evaluate(test_dataset_series1) # , batch_size=batch_size, verbose=0
    # predict
    yhat_probs = model.predict(X_test_data,verbose=VERBOSE)
    # predict crisp classes for test set deprecated
    
    # generate time metrics
    current_time2 = datetime.datetime.now()
    time_stamp2 = current_time2.timestamp()
    processing_time_s = (time_stamp2-time_stamp)
    
    train_accuracy = history.history["loss"][TEST_EPOCHS-1]
    train_loss     = history.history["categorical_accuracy"][TEST_EPOCHS-1]
    val_acc = history.history["val_loss"][TEST_EPOCHS-1]
    val_loss     = history.history["val_categorical_accuracy"][TEST_EPOCHS-1]
    # generate general metrics
    rowData = [current_time,current_time2,processing_time_s,(processing_time_s)/60,train_accuracy,train_loss,val_acc,val_loss]

    y_pred_labels = pd.DataFrame(data=yhat_probs,columns=['awake','asleep'])
    y_test_labels = pd.DataFrame(data=y_test_data,columns=['awake','asleep'])

    feature_metrics_gathered = []
    # print('')
    print('awake')    
    res,resA = printMetrics(y_test_labels['awake'],y_pred_labels['awake'])
    feature_metrics_gathered.append(res)
   
    #columns = ['NN_type','units','epochs','batch_size','max_iterations',''Users',
    #            round_iteration','start_time','end_time','round_time_s','round_time_m',
    #           'class','accuracy','precision','recall','f1_score','cohen_kappa_score','roc_auc_score','confusion_matrix',
    #           'TP','FP','FN','TN']
    # new data
    classData = np.concatenate((['awake'], resA))
    classData = np.concatenate((rowData, classData))
    classData = np.concatenate((generalData, classData))
    allMetrics.append(classData)
    
    print('')
    print('asleep')
    res,resA = printMetrics(y_test_labels['asleep'],y_pred_labels['asleep'])
    feature_metrics_gathered.append(res)
    # new data
    classData = np.concatenate((['asleep'], resA))
    classData = np.concatenate((rowData, classData))
    classData = np.concatenate((generalData, classData))
    allMetrics.append(classData)
    print('')
    print('Global')
    resA = showGlobalMetrics(feature_metrics_gathered) #return [accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score
    # new data
    classData = np.concatenate((['avg'], resA))
    classData = np.concatenate((rowData, classData))
    classData = np.concatenate((generalData, classData))
    allMetrics.append(classData)
    print('')
    print("End timestamp:", time_stamp2,current_time2)
    print('')
    print('')
    print('-----------------------------------------------------------------------')
    print('')
    print('')

input_shape=[ (169101, 9) ]
output shape: 2
Epochs: 5
Start timestamp: 1709649195.75048 2024-03-05 11:33:15.750480

Loading checkpoint:  result_week_tested_MLP_unb_batch_size_32_16-8_checkpoints/checkpoint_epoch_5.hdf5

awake
Accuracy: 0.979788
Precision: 0.992126
Recall: 0.975232
F1 score: 0.983607
Cohens kappa: 0.957263
ROC AUC: 0.997802
\Confusion Matrix
[[10476   135]
 [  432 17010]]

asleep
Accuracy: 0.979788
Precision: 0.960396
Recall: 0.987277
F1 score: 0.973651
Cohens kappa: 0.957263
ROC AUC: 0.997802
\Confusion Matrix
[[17010   432]
 [  135 10476]]

Global
2
accuracy:  0.9797882579403272
precision:  0.9762610119279644
recall:  0.9812547759159912
f1_score:  0.9786288746734682
cohen_kappa_score:  0.9572628143981166
roc_auc_score:  0.9978020939191266

End timestamp: 1709649196.633136 2024-03-05 11:33:16.633136


-----------------------------------------------------------------------


input_shape=[ (169101, 9) ]
output shape: 2
Epochs: 15
Start timestamp: 1709649196.746924 2024-0

In [109]:
dataMetrics = pd.DataFrame(data=allMetrics,columns=columnsOutputMetrics) 

dataMetrics

Unnamed: 0,NN_type,units,epochs,batch_size,window_size,time_step_shift,start_time,end_time,time_s,time_m,...,precision,recall,f1_score,cohen_kappa_score,roc_auc_score,confusion_matrix,TP,FP,FN,TN
0,MLP,16-8,5,32,-1,-1,2024-03-05 11:33:15.750480,2024-03-05 11:33:16.633136,0.882656,0.014711,...,0.9921259842519684,0.9752321981424148,0.9836065573770492,0.9572628143981166,0.9978020939191266,0.0,10476.0,135.0,432.0,17010.0
1,MLP,16-8,5,32,-1,-1,2024-03-05 11:33:15.750480,2024-03-05 11:33:16.633136,0.882656,0.014711,...,0.9603960396039604,0.9872773536895676,0.973651191969887,0.9572628143981166,0.9978020939191266,0.0,17010.0,432.0,135.0,10476.0
2,MLP,16-8,5,32,-1,-1,2024-03-05 11:33:15.750480,2024-03-05 11:33:16.633136,0.882656,0.014711,...,0.9762610119279644,0.9812547759159912,0.9786288746734682,0.9572628143981166,0.9978020939191266,,,,,
3,MLP,16-8,15,32,-1,-1,2024-03-05 11:33:16.746924,2024-03-05 11:33:17.487717,0.740793,0.012347,...,0.9953271028037384,0.9891640866873064,0.9922360248447204,0.9795781181206366,0.9997557882132364,0.0,10530.0,81.0,189.0,17253.0
4,MLP,16-8,15,32,-1,-1,2024-03-05 11:33:16.746924,2024-03-05 11:33:17.487717,0.740793,0.012347,...,0.982367758186398,0.9923664122137404,0.9873417721518988,0.9795781181206366,0.9997557882132364,0.0,17253.0,189.0,81.0,10530.0
5,MLP,16-8,15,32,-1,-1,2024-03-05 11:33:16.746924,2024-03-05 11:33:17.487717,0.740793,0.012347,...,0.9888474304950682,0.9907652494505236,0.9897888984983096,0.9795781181206366,0.9997557882132364,,,,,
6,MLP,16-8,30,32,-1,-1,2024-03-05 11:33:17.598204,2024-03-05 11:33:18.337473,0.739269,0.012321,...,0.9953703703703703,0.9984520123839008,0.9969088098918084,0.99180680137999,0.9998857719061912,0.0,10530.0,81.0,27.0,17415.0
7,MLP,16-8,30,32,-1,-1,2024-03-05 11:33:17.598204,2024-03-05 11:33:18.337473,0.739269,0.012321,...,0.9974424552429668,0.9923664122137404,0.9948979591836734,0.99180680137999,0.9998857719061912,0.0,17415.0,27.0,81.0,10530.0
8,MLP,16-8,30,32,-1,-1,2024-03-05 11:33:17.598204,2024-03-05 11:33:18.337473,0.739269,0.012321,...,0.9964064128066684,0.9954092122988208,0.9959033845377409,0.99180680137999,0.9998857719061912,,,,,
9,MLP,16-8,50,32,-1,-1,2024-03-05 11:33:18.444257,2024-03-05 11:33:19.185512,0.741255,0.012354,...,1.0,1.0,1.0,1.0,1.0,0.0,10611.0,0.0,0.0,17442.0


In [110]:
dataMetrics.to_csv(outputMetricFile, sep=',', encoding='utf-8', index=False)

In [117]:
#generate model
model = keras.Sequential()
model.add(tf.keras.layers.InputLayer(input_shape=(len(inputFeatures),)))
model.add(tf.keras.layers.Dense(len(inputFeatures)))
model.add(tf.keras.layers.Dense(16, activation=tf.keras.activations.relu))
model.add(tf.keras.layers.Dense(8, activation=tf.keras.activations.relu),)
model.add(tf.keras.layers.Dense(len(outputClasses), activation='softmax'))

# compile model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.CategoricalAccuracy()])
          #loss='binary_crossentropy',loss='categorical_crossentropy',
          #loss='binary_crossentropy',  sparse_categorical_crossentropy 

for TEST_EPOCHS in EPOCHS_ARRAY_TEST:
    # general data from the run
    generalData = [NN_type,UNITS_NUMBER,TEST_EPOCHS,BATCH_SIZE,TIME_SERIES_SIZE,TIME_STEP_SHIFT]
    
    print("input_shape=[", X_train_data.shape,"]")
    print("output shape:",len(outputClasses))
    print("Epochs:",TEST_EPOCHS)
    
    current_time = datetime.datetime.now()
    time_stamp = current_time.timestamp()
    print("Start timestamp:", time_stamp,current_time)
    print()
    
    checkPpath = filepath.replace("{epoch}",str(TEST_EPOCHS))
    print("Loading checkpoint: ", checkPpath)
    print('')
    
    model.load_weights(checkPpath)
    # evaluate model
    #accuracy = model.evaluate(test_dataset_series1) # , batch_size=batch_size, verbose=0
    # predict
    yhat_probs = model.predict(X_test2_data,verbose=VERBOSE)
    # predict crisp classes for test set deprecated
    
    # generate time metrics
    current_time2 = datetime.datetime.now()
    time_stamp2 = current_time2.timestamp()
    processing_time_s = (time_stamp2-time_stamp)
    
    train_accuracy = history.history["loss"][TEST_EPOCHS-1]
    train_loss     = history.history["categorical_accuracy"][TEST_EPOCHS-1]
    val_acc = history.history["val_loss"][TEST_EPOCHS-1]
    val_loss     = history.history["val_categorical_accuracy"][TEST_EPOCHS-1]
    # generate general metrics
    rowData = [current_time,current_time2,processing_time_s,(processing_time_s)/60,train_accuracy,train_loss,val_acc,val_loss]

    y_pred_labels = pd.DataFrame(data=yhat_probs,columns=['awake','asleep'])
    y_test_labels = pd.DataFrame(data=y_test2_data,columns=['awake','asleep'])

    feature_metrics_gathered = []
    # print('')
    print('awake')    
    res,resA = printMetrics(y_test_labels['awake'],y_pred_labels['awake'])
    feature_metrics_gathered.append(res)
   
    #columns = ['NN_type','units','epochs','batch_size','max_iterations',''Users',
    #            round_iteration','start_time','end_time','round_time_s','round_time_m',
    #           'class','accuracy','precision','recall','f1_score','cohen_kappa_score','roc_auc_score','confusion_matrix',
    #           'TP','FP','FN','TN']
    # new data
    classData = np.concatenate((['awake'], resA))
    classData = np.concatenate((rowData, classData))
    classData = np.concatenate((generalData, classData))
    allMetrics.append(classData)
    
    print('')
    print('asleep')
    res,resA = printMetrics(y_test_labels['asleep'],y_pred_labels['asleep'])
    feature_metrics_gathered.append(res)
    # new data
    classData = np.concatenate((['asleep'], resA))
    classData = np.concatenate((rowData, classData))
    classData = np.concatenate((generalData, classData))
    allMetrics.append(classData)
    print('')
    print('Global')
    resA = showGlobalMetrics(feature_metrics_gathered) #return [accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score
    # new data
    classData = np.concatenate((['avg'], resA))
    classData = np.concatenate((rowData, classData))
    classData = np.concatenate((generalData, classData))
    allMetrics.append(classData)
    print('')
    print("End timestamp:", time_stamp2,current_time2)
    print('')
    print('')
    print('-----------------------------------------------------------------------')
    print('')
    print('')

input_shape=[ (169101, 9) ]
output shape: 2
Epochs: 5
Start timestamp: 1709650236.09589 2024-03-05 11:50:36.095890

Loading checkpoint:  result_week_tested_MLP_unb_batch_size_32_16-8_checkpoints/checkpoint_epoch_5.hdf5

awake
Accuracy: 0.568990
Precision: 0.830853
Recall: 0.556377
F1 score: 0.666461
Cohens kappa: 0.120362
ROC AUC: 0.602845
\Confusion Matrix
[[15305  9696]
 [37975 47627]]

asleep
Accuracy: 0.568990
Precision: 0.287256
Recall: 0.612176
F1 score: 0.391027
Cohens kappa: 0.120362
ROC AUC: 0.602863
\Confusion Matrix
[[47627 37975]
 [ 9696 15305]]

Global
2
accuracy:  0.5689899912298944
precision:  0.5590546205910567
recall:  0.5842763502141861
f1_score:  0.528744313856802
cohen_kappa_score:  0.12036210236030198
roc_auc_score:  0.6028541857554688

End timestamp: 1709650238.958575 2024-03-05 11:50:38.958575


-----------------------------------------------------------------------


input_shape=[ (169101, 9) ]
output shape: 2
Epochs: 15
Start timestamp: 1709650239.357919 2024-0

In [None]:
dataMetrics = pd.DataFrame(data=allMetrics,columns=columnsOutputMetrics) 

dataMetrics

In [None]:
dataMetrics.to_csv(outputMetricFile+"2", sep=',', encoding='utf-8', index=False)

In [None]:
model.get_weights()

In [None]:
model_json = model.to_json()
model_json

In [None]:
def save_weights_only(weights, filename):
    np.savez(filename, *weights)

def load_weights_only(filename):
    return np.load(filename, allow_pickle=True)

# Save only the weights
weights = model.get_weights()
save_weights_only(weights, 'model_weights.npz')

In [None]:
# Load the weights
loaded_weights = load_weights_only('model_weights.npz')
loaded_weights


In [None]:
weights

In [None]:
loaded_weights

In [None]:
len(loaded_weights)

In [None]:
keys = loaded_weights.keys()
keys
arrays = []
for x in keys:
    print(x)
    
    arrays.append(loaded_weights[x])

arrays

In [None]:
model2 = keras.Sequential()
model2.add(tf.keras.layers.InputLayer(input_shape=(len(inputFeatures),)))
model2.add(tf.keras.layers.Dense(len(inputFeatures)))
model2.add(tf.keras.layers.Dense(16, activation=tf.keras.activations.relu))
model2.add(tf.keras.layers.Dense(8, activation=tf.keras.activations.relu),)
model2.add(tf.keras.layers.Dense(len(outputClasses), activation='softmax'))

# compile model
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[tf.keras.metrics.CategoricalAccuracy()])
          #loss='binary_crossentropy',loss='categorical_crossentropy',
          #loss='binary_crossentropy',  sparse_categorical_crossentropy 

In [None]:
model2.set_weights(arrays)

In [None]:
model2