<a href="https://colab.research.google.com/github/basolu/Basolu-Machine-Learning/blob/main/main_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [28]:
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from matplotlib import pyplot
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from tensorflow.keras.utils import to_categorical
from keras.callbacks import TensorBoard
from keras.models import model_from_json
import numpy as np
import os
from numpy import save
from math import*
from sklearn.metrics import classification_report
from keras.models import Model

train = int(input("Run training? (1/0): "))
print(train)

def euclidean_distance(x,y):
    return sqrt(sum(pow(a-b,2) for a, b in zip(x, y)))

def manhattan_distance(x,y):
    return sum(abs(a-b) for a,b in zip(x,y))

def nth_root(value, n_root): 
    root_value = 1/float(n_root)
    return round (Decimal(value) ** Decimal(root_value),3)
 
def minkowski_distance(x,y,p_value):
    return nth_root(sum(pow(abs(a-b),p_value) for a,b in zip(x, y)),p_value)

def square_rooted(x):
    return round(sqrt(sum([a*a for a in x])),3)
 
def cosine_similarity(x,y):
    numerator = sum(a*b for a,b in zip(x,y))
    denominator = square_rooted(x)*square_rooted(y)
    return round(numerator/float(denominator),3)

# load a single file as a numpy array
def load_file(filepath):
	dataframe = read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values

# load a list of files and return as a 3d numpy array
def load_group(filenames, prefix=''):
	loaded = list()
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
	# stack group so that features are the 3rd dimension
	loaded = dstack(loaded)
	return loaded

# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
  filepath = prefix + group + '/Inertial Signals/'
  # load all 9 files as a single array
  filenames = list()
  # total acceleration
  filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
  # body acceleration
  filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
  # body gyroscope
  filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
  # load input data
  X = load_group(filenames, filepath)
  # load class output
  y = load_file(prefix + group + '/y_'+group+'.txt')
  return X, y

# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
  # load all train
  trainX, trainy = load_dataset_group('train', prefix + 'drive/MyDrive/HARDataset/')
  print(trainX.shape, trainy.shape)
  # load all test
  testX, testy = load_dataset_group('test', prefix + 'drive/MyDrive/HARDataset/')
  subjects = read_csv('drive/MyDrive/HARDataset/train/subject_train.txt', header=None, delim_whitespace=True)
  print(testX.shape, testy.shape)
  # zero-offset class values
  trainy = trainy - 1
  testy = testy - 1
  # one hot encode y
  trainy = to_categorical(trainy)
  testy = to_categorical(testy)
  print(trainX.shape, trainy.shape, testX.shape, testy.shape)
  return trainX, trainy, testX, testy

# fit and evaluate a model
def evaluate_model(trainX, trainy, testX, testy):
  verbose, epochs, batch_size = 1, 15, 32
  n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
  model = Sequential()
  model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
  model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
  model.add(Dropout(0.5))
  model.add(MaxPooling1D(pool_size=2))
  #classificazione delle features estratte: 
  model.add(Flatten())
  #numero di neuroni (100) = numero di classi delle attività da individuare
  model.add(Dense(100, activation='relu')) 
  extract = Model(model.inputs, model.layers[-3].output)
  features = extract.predict(testX)
  save('features_marco.txt', features)
  model.add(Dense(n_outputs, activation='softmax')) 
  tensorboard = TensorBoard(log_dir='./logs', histogram_freq=0, write_graph=True, write_images=False)
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
  # fit network
  model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks=[tensorboard], validation_data=(testX, testy))
  model_json = model.to_json()
  with open("model.json", "w") as json_file:
    json_file.write(model_json)
  # serialize weights to HDF5
  model.save_weights("model.h5")
  print("Saved model to disk")
  # evaluate model
  _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
  return accuracy

# summarize scores
def summarize_results(scores):
  print(scores)
  m, s = mean(scores), std(scores)
  print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

#-----------------------------------------------------------------------------

def new_classification_report(loaded_model, testX, testy, features):
  print("Start classification report")
  features_print = read_csv('drive/MyDrive/HARDataset/activity_labels.txt', header=None, delim_whitespace=True)
  testX = testX.reshape(len(testX),128,9)
  result = (loaded_model.predict(testX))
  list_result = list()
  list_test = list()
  columns = 6
  for r in range(0,len(testy)): 
    index = 0
    max = 0
    for t in range(0,columns):
      prov = float(result[r][t])
      if(prov > max):
        max = prov
        index = t
      if(testy[r][t] == 1):
        list_test.append(t+1)
    list_result.append(index+1)
  print(classification_report(list_test, list_result, target_names=features_print[1]))
  print("End classification_report")

def new_predict(loaded_model, testX, testy):
  print("Start predict")
  features_print = read_csv('drive/MyDrive/HARDataset/activity_labels.txt', header=None, delim_whitespace=True)
  request = 1
  while(request > 0):
    request = int(input("Inserire l'indice della misurazione da riconoscere (0 per uscire): "))
    target = testX[request]
    target = target.reshape(1,128,9)
    testX = testX.reshape(len(testX),128,9)
    result = (loaded_model.predict(target))
    #result = result.reshape(len(testX),6,1)
    columns = 6
    max = 0
    index = 0
    for t in range(0,columns):
      prov = result[0][t]
      if(prov > max):
        max = prov
        index = t
    print(request, end=' ')
    print(features_print[1][index], end=' ')
    print("({})" .format(index+1))      
    print("Correct: ", testy[request])
  print("Fine prediction")

def new_evaluate_fdistance(loaded_model, testX, testy, features):
  print("Start evaluation of the features distances")
  features_print = read_csv('drive/MyDrive/HARDataset/activity_labels.txt', header=None, delim_whitespace=True)
 # print("Summary", loaded_model.summary())
  extract = Model(loaded_model.inputs, loaded_model.layers[-2].output)
  features = extract.predict(testX)
  print(features.shape)
  #features = features.reshape(features.shape[0], (features.shape[1] * features.shape[2]))
  #print("Features: ", features[r])
  #print("Features shape: ", features.shape) #(2947, 3968) --> 3968=128x31 con 31 n° di soggeti
  '''print("Start distance calculation (input -1 to end)")
  x = ddd
  while(1):
    x = int(input("Firts features: "))
    if (x == -1): return
    y = int(input("Second features: "))
    print("Euclide:",euclidean_distance(features[x], features[y]))
    print("Manhattan:",manhattan_distance(features[x], features[y]))
    print("Cosine:", cosine_similarity(features[x], features[y]))
  '''
  tot_corrects = 0
  find_corrects = 0
  cos_prov = 0
  for r in range(0, (features.shape[0]-1)):
    for t in range(0, (features.shape[0]-1)):
      if (sum(testy[r]*testy[t])):
        tot_corrects += 1
        cos_prov = cosine_similarity(features[r], features[t])
        if (cos_prov >= 0.8): find_corrects += 1
  print("Total corrects:", tot_corrects)
  print("Corrects found:", find_corrects)
  print("% accuracy (with cos>0.8):", (find_corrects/tot_corrects)*100)

  #print(sum(testy[6]*testy[80]))

  '''layers[-2] sembra essere quello più accurato, un po troppo "ottimista",
    attività completamente diverse di soggetti diversi restituiscono una 
    distanza cosenica mai inferiore allo 0,3
    Anche il layer[-5] risulta abbastanza accurato, diminuendo la precisone
    quando si analizzano le features di livello più alto
  '''

def predict_prova(): 
  mode = int(input("Classification_report(2), Evaluate_fdistance(1) or predict(0)? "))
  trainX, trainy, testX, testy = load_dataset()
  features_print = read_csv('drive/MyDrive/HARDataset/activity_labels.txt', header=None, delim_whitespace=True)
  json_file = open('drive/MyDrive/HARDataset/model.json', 'r')
  loaded_model_json = json_file.read()
  json_file.close()
  loaded_model = model_from_json(loaded_model_json)
  # load weights into new model
  loaded_model.load_weights("drive/MyDrive/HARDataset/model.h5")
  print("Loaded model from disk")
  loaded_model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
  _, accuracy = loaded_model.evaluate(testX, testy, batch_size=32, verbose=1)
  print("Accuracy: ",accuracy)
  extract = Model(loaded_model.inputs, loaded_model.layers[-3].output)
  features = extract.predict(testX)
  if (mode == 2): 
    new_classification_report(loaded_model, testX, testy, features)  
  elif (mode == 1):
    print("Start ")
    new_evaluate_fdistance(loaded_model, testX, testy, features)
  elif (mode == 0):
    #effetua la predict delle misurazioni, riconoscendo l'azione relativa, 
    #tramite gli indici di tali misurazioni
    new_predict(loaded_model, testX, testy)
  
#------------------------------------------------------------------------------

# run an experiment
def run_experiment(repeats=10):
  print("Start experiment")
  # load data
  trainX, trainy, testX, testy = load_dataset()
  # repeat experiment
  scores = list()
  for r in range(repeats):
    score = evaluate_model(trainX, trainy, testX, testy)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores.append(score)
	  # summarize results
    summarize_results(scores)

# run the experiment
if(train == 1):
   run_experiment()
predict_prova()



Run training? (1/0): 0
0
Classification_report(2), Evaluate_fdistance(1) or predict(0)? 1
(7352, 128, 9) (7352, 1)
(2947, 128, 9) (2947, 1)
(7352, 128, 9) (7352, 6) (2947, 128, 9) (2947, 6)
Loaded model from disk
Accuracy:  0.8995589017868042
Start 
Start evaluation of the features distances
(2947, 100)
Total corrects: 1455790
Corrects found: 1250190
% accuracy (with cos>0.8): 85.87708392007089


In [None]:

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
!pip install --upgrade tensorflow
!pip install --upgrade tensorflow-gpu

Collecting tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/aa/fd/993aa1333eb54d9f000863fe8ec61e41d12eb833dea51484c76c038718b5/tensorflow-2.5.0-cp37-cp37m-manylinux2010_x86_64.whl (454.3MB)
[K     |████████████████████████████████| 454.3MB 26kB/s 
Collecting tensorflow-estimator<2.6.0,>=2.5.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/ec/78/b27f73e923becc6e79e18fe112cf75e3200d1ee35b0dba8fa46181bce56c/tensorflow_estimator-2.5.0-py2.py3-none-any.whl (462kB)
[K     |████████████████████████████████| 471kB 27.3MB/s 
[?25hCollecting gast==0.4.0
  Downloading https://files.pythonhosted.org/packages/b6/48/583c032b79ae5b3daa02225a675aeb673e58d2cb698e78510feceb11958c/gast-0.4.0-py3-none-any.whl
Collecting tensorboard~=2.5
[?25l  Downloading https://files.pythonhosted.org/packages/44/f5/7feea02a3fb54d5db827ac4b822a7ba8933826b36de21880518250b8733a/tensorboard-2.5.0-py3-none-any.whl (6.0MB)
[K     |████████████████████████████████| 6.0MB 31.1MB/s 
Ins

Collecting tensorflow-gpu
[?25l  Downloading https://files.pythonhosted.org/packages/1d/a2/5ccf0a418eb22e0a2ae9edc1e7f5456d0a4b8b49524572897564b4030a9b/tensorflow_gpu-2.5.0-cp37-cp37m-manylinux2010_x86_64.whl (454.3MB)
[K     |████████████████████████████████| 454.3MB 36kB/s 
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-2.5.0


In [None]:
%load_ext tensorboard
%tensorboard --logdir logs