In [11]:
# demonstration of calculating metrics for a neural network model using sklearn
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import confusion_matrix

# TensorFlow e tf.keras
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
import pandas as pd
# Bibliotecas Auxiliares
import numpy as np
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler
from collections import Counter

print(tf.__version__)

2.10.0


In [12]:
# configs
EPOCHS = 30
BATCH_SIZE = 32
VERBOSE = 0

baseFolder = "../data_2019_processed/"

In [13]:
# y_test     = Array with real values
# yhat_probs = Array with predicted values
def printMetrics(y_test,yhat_probs):
    # predict crisp classes for test set deprecated
    #yhat_classes = model.predict_classes(X_test, verbose=0)
    #yhat_classes = np.argmax(yhat_probs,axis=1)
    yhat_classes = yhat_probs.round()
    # accuracy: (tp + tn) / (p + n)
    accuracy = accuracy_score(y_test, yhat_classes)
    print('Accuracy: %f' % accuracy)
    # precision tp / (tp + fp)
    precision = precision_score(y_test, yhat_classes)
    print('Precision: %f' % precision)
    # recall: tp / (tp + fn)
    recall = recall_score(y_test, yhat_classes)
    print('Recall: %f' % recall)
    # f1: 2 tp / (2 tp + fp + fn)
    f1 = f1_score(y_test, yhat_classes)
    print('F1 score: %f' % f1)
    # kappa
    kappa = cohen_kappa_score(y_test, yhat_classes)
    print('Cohens kappa: %f' % kappa)
    # ROC AUC
    auc = roc_auc_score(y_test, yhat_probs)
    print('ROC AUC: %f' % auc)
    # confusion matrix
    print("\Confusion Matrix")
    matrix = confusion_matrix(y_test, yhat_classes)
    print(matrix)
    
    results = dict()
    results['accuracy'] = accuracy
    results['precision'] = precision
    results['recall'] = recall
    results['f1_score'] = f1
    results['cohen_kappa_score'] = kappa
    results['roc_auc_score'] = auc
    return results

def showGlobalMetrics(metrics):
    accuracy,precision,recall,f1_score,cohen_kappa_score,roc_auc_score = 0,0,0,0,0,0
    for metric in metrics:
        accuracy = accuracy + metric['accuracy']
        precision = precision + metric['precision']
        recall = recall + metric['recall']
        f1_score = f1_score + metric['f1_score']
        cohen_kappa_score = cohen_kappa_score + metric['cohen_kappa_score']
        roc_auc_score = roc_auc_score + metric['roc_auc_score']
        
    # mean
    size = len(metrics)
    print(size)
    accuracy = accuracy / size
    precision = precision / size
    recall = recall / size
    f1_score = f1_score / size
    cohen_kappa_score = cohen_kappa_score / size
    roc_auc_score = roc_auc_score / size
    
    #show:\
    print("accuracy: ",accuracy)
    print("precision: ",precision)
    print("recall: ",recall)
    print("f1_score: ",f1_score)
    print("cohen_kappa_score: ",cohen_kappa_score)
    print("roc_auc_score: ",roc_auc_score)
    
def transform_data_type(dataframe):
    
    # transform inputs
    for column in inputFeatures:
        dataframe[column] = dataframe[column].astype('float32')
    
    # transform outputs
    for column in outputClasses:
        dataframe[column] = dataframe[column].astype('float32')
    
    return dataframe

# one-hot encoding function
def transform_output_nominal_class_into_one_hot_encoding(dataset):
    # create two classes based on the single class
    one_hot_encoded_data = pd.get_dummies(dataset['class'])
    #print(one_hot_encoded_data)
    dataset['awake'] = one_hot_encoded_data['awake']
    dataset['asleep'] = one_hot_encoded_data['asleep']
    
    return dataset

# one-hot encoding function
def transform_output_numerical_class_into_one_hot_encoding(dataset):
    # create two classes based on the single class
    one_hot_encoded_data = pd.get_dummies(dataset['class'])
    #print(one_hot_encoded_data)
    dataset['awake'] = one_hot_encoded_data[0]
    dataset['asleep'] = one_hot_encoded_data[1]
    
    return dataset

In [14]:
#X_train = pd.read_csv(baseFolder+"train/allData-classification-numeric-normalized.csv")
X_test  = pd.read_csv(baseFolder+"test/allData-classification-numeric-normalized.csv")
#X_train_under = pd.read_csv(baseFolder+"train/allData-classification-numeric-normalized_balanced_undersample.csv")
#X_train_over = pd.read_csv(baseFolder+"train/allData-classification-numeric-normalized_balanced_oversample.csv")
X_train_over = pd.read_csv(baseFolder+"train/allData-classification-numeric-normalized_balanced_oversample_smote.csv")
#X_test  = pd.read_csv(baseFolder+"test/allData-classification-numeric-normalized_balanced_undersample.csv")
X_train = X_train_over
#AA = pd.read_csv(baseFolder+"allData-classification-numeric-normalized.csv")

#X_train, X_test = train_test_split(AA,test_size=0.25)
#X_train = pd.read_csv("train_temp.csv")
#X_test = pd.read_csv("test_temp.csv")

In [15]:
print(X_train.info())
X_train

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 578534 entries, 0 to 578533
Data columns (total 10 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   activity            578534 non-null  float64
 1   location            578534 non-null  float64
 2   day_of_week         578534 non-null  float64
 3   light               578534 non-null  float64
 4   phone_lock          578534 non-null  float64
 5   proximity           578534 non-null  float64
 6   sound               578534 non-null  float64
 7   time_to_next_alarm  578534 non-null  float64
 8   minutes_day         578534 non-null  float64
 9   class               578534 non-null  object 
dtypes: float64(9), object(1)
memory usage: 44.1+ MB
None


Unnamed: 0,activity,location,day_of_week,light,phone_lock,proximity,sound,time_to_next_alarm,minutes_day,class
0,0.00,0.0,0.000000,0.000175,0.0,1.0,0.000000,0.982143,0.597637,awake
1,0.00,0.0,0.000000,0.000175,0.0,1.0,0.000000,0.982143,0.597637,awake
2,0.25,0.5,0.000000,0.000165,0.0,1.0,0.604408,0.982044,0.598332,awake
3,0.25,0.5,0.000000,0.001449,0.0,1.0,0.604408,0.982044,0.598332,awake
4,0.25,0.5,0.000000,0.000198,0.0,1.0,0.601849,0.981944,0.599027,awake
...,...,...,...,...,...,...,...,...,...,...
578529,0.25,0.0,0.333333,0.000000,0.0,1.0,0.745277,0.029066,0.213355,asleep
578530,0.25,1.0,1.000000,0.000015,1.0,0.0,0.651539,0.000269,0.521304,asleep
578531,0.25,1.0,0.833333,0.000003,0.0,1.0,0.568262,0.962894,0.649083,asleep
578532,0.25,1.0,0.500000,0.000000,0.0,0.0,0.520989,0.000000,0.667252,asleep


In [16]:
# create two classes based on the single class

# transform output to one_hot_encoding for the testing dataset
X_test = transform_output_nominal_class_into_one_hot_encoding(X_test)

# transform output to one_hot_encoding for the testing dataset
X_train = transform_output_nominal_class_into_one_hot_encoding(X_train)

# transform output to one_hot_encoding for the testing dataset
#X_train_under = transform_output_nominal_class_into_one_hot_encoding(X_train_under)

# transform output to one_hot_encoding for the testing dataset
#X_train_over = transform_output_nominal_class_into_one_hot_encoding(X_train_over)

In [20]:
# activity	location	timestamp	day_of_week	light	phone_lock	proximity	sound	time_to_next_alarm	minutes_day
inputFeatures = ["activity","location","day_of_week","light","phone_lock","proximity","sound","time_to_next_alarm", "minutes_day"]
#inputFeatures = ["activity","location","day_of_week","light","phone_lock","proximity","sound", "minutes_day"]

outputClasses = ["awake","asleep"]



def create_keras_model(inputFeatures,outputClasses):
    return tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(len(inputFeatures),)),
        tf.keras.layers.Dense(len(inputFeatures)),
        tf.keras.layers.Dense(16, activation=tf.keras.activations.relu),
        tf.keras.layers.Dense(12, activation=tf.keras.activations.relu),
        tf.keras.layers.Dense(len(outputClasses), activation='softmax')
    ])

In [21]:
print("--------------")
print("--------------")
print("Initializing Oversample SMOTE test")
print("--------------")
print("Batch Size: ",BATCH_SIZE)
print("Epochs: ",EPOCHS)
print("Input/Features:",len(inputFeatures))
print("Outputs:",len(outputClasses))
print("--------------")

# selects the data to train and test
X_train_data = pd.DataFrame(data=X_train,columns=inputFeatures)
y_train_data = pd.DataFrame(data=X_train,columns=outputClasses)
# selec test dataset (fixed to all)
X_test_data = pd.DataFrame(data=X_test,columns=inputFeatures)
y_test_data = pd.DataFrame(data=X_test,columns=outputClasses)

# instanciates the model
model = create_keras_model(inputFeatures,outputClasses)

# compiles the model
history_ = model.compile(optimizer='adam',
             # loss='binary_crossentropy',loss='categorical_crossentropy',
              #loss='binary_crossentropy',  sparse_categorical_crossentropy         
              loss='categorical_crossentropy',
              metrics=[tf.keras.metrics.CategoricalAccuracy()])
# 
model.fit(X_train_data, y_train_data, epochs=EPOCHS,batch_size=BATCH_SIZE)

results = model.evaluate(X_test_data, y_test_data, return_dict=True)

print(results)

--------------
--------------
Initializing Oversample SMOTE test
--------------
Batch Size:  32
Epochs:  30
Input/Features: 9
Outputs: 2
--------------
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30

KeyboardInterrupt: 

In [None]:
loss_dataframe = pd.DataFrame(history_.history)
print("\n--------------")

loss_dataframe

In [None]:
yhat_probs = model.predict(X_test_data)
# dataset
probss = pd.DataFrame(data=yhat_probs,columns=['awake','asleep'])

print("\n--------------\n")

test = list()
print('')
print('awake')
res = printMetrics(y_test_data['awake'],probss['awake'])
test.append(res)
print('')
print('asleep')
res = printMetrics(y_test_data['asleep'],probss['asleep'])
test.append(res)
print('')
print('Global')
showGlobalMetrics(test)