In [15]:
import numpy as np
import matplotlib.pyplot as plt
#import time
import json
import tensorflow as tf
#tf.compat.v1.disable_eager_execution()
#tf.compat.v1.Session()
import keras
import keras_metrics
import random

import import_ipynb
from reading_splitting_dataset_functions import *
from __future__ import print_function
from tensorflow.keras.optimizers import SGD, RMSprop, Adam
from Kreuzval import StratifiedGroupKFold

In [16]:
### Hier AUSWAHL TREFFEN!

n_zuschnitt = 1500  # von den Über 30000 Messreihen werden nur n_zuschnitt -viele benutzt
skip_zeit = 2  # In der Zeitreihe wird nur jede skip_zeit+1 -te Messung benutzt.

# Parameter für Kreuzvalidierung
n_splits = 2

# Parameter, die durchgetestet werden sollen
liste_n_epochs = [5, 20, 60]
liste_learning_rates = [0.01, 0.3, 1]
liste_dropout_rates = [0, 0.5]
liste_batch_sizes = [8, 32]
liste_units_vecs = [[50,20],[50,40,30,20]]

In [17]:
data_roi=open_js_file('data_preprocessed_roi.JSON')
print('length ROI:', len(data_roi))

length ROI: 33676


In [18]:
df_roi, fid_roi, v_roi, lva_roi, lha_roi = get_acceleration_fid_v_labels(data_roi)

In [19]:
# Hier wird der Datensatz kleiner gemacht, damit es noch in vernünftiger Zeit läuft

zuschnitt = random.sample(range(len(data_roi)), n_zuschnitt)
zeiten = range(0, df_roi.shape[1], skip_zeit+1)
df_roi = df_roi[zuschnitt, :, :]
df_roi = df_roi[:, zeiten, :]
fid_roi = fid_roi[zuschnitt]
v_roi = v_roi[zuschnitt]
lva_roi = lva_roi[zuschnitt]
lha_roi = lha_roi[zuschnitt]
l_roi = labels_roi(lva_roi, lha_roi)

In [20]:
#amount splits into test and trainingsdata such every fid was exactly once in testdata
#with n_splits->
cv = StratifiedGroupKFold(n_splits)

In [21]:
# Für die Gittersuche muss das Modellerstellen abstrahiert werden:

def create_model(units_vec, input_dim, dropout_rate, learning_rate):
    ### units_vec = [4,6,2,6] heißt, dass es 4 versteckte Schichten mit der jeweiligen Neuronenanzahl gibt
    ### input_dim ist die Länge einer Messreihe (also z.B. 512*3 = ...)
    
    model = tf.keras.models.Sequential()
    
    # Versteckte Schichten (inkl. Dropouts) anhängen
    for i in range(len(units_vec)):
        n_units = units_vec[i]
        # Für die erste Schicht wird Eingabegröße festgelegt, für die anderen nicht:
        if i == 0:
            model.add(tf.keras.layers.Dense(input_dim = input_dim, units = n_units))
        else:
            model.add(tf.keras.layers.Dense(n_units))
            
        # nach jeder versteckten Schicht kommt ein Dropout
        model.add(tf.keras.layers.Dropout(dropout_rate))
    
    # Ausgabeschicht
    model.add(tf.keras.layers.Dense(4))
    
    # Hyperparameter einfügen
    loss_fn = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
    model.compile(optimizer = tf.keras.optimizers.RMSprop(learning_rate=learning_rate),
        loss=loss_fn,
        metrics=['accuracy'])
    
    return model

In [22]:
# Das trainieren und Testen wird auch abstrahiert

def test_mean_acc(units_vec, splits, liste_n_epochs = [30], dropout_rate = 0.0, learning_rate = 0.001, batch_size = 32):
    ### Gibt errechnete durchschnittliche Validation-Accuracy aus.
    ### Und zwar zu den Epochen aus liste_n_epochs
    ### Der Durchschnitt wird über die unterschiedlichen splits genommen
    n_splits = len(splits)
    accuracies = np.zeros([len(liste_n_epochs), n_splits])
    i_split = 0
    for train_idxs, test_idxs in splits:
        x_train, y_train, x_test, y_test = bring_in_right_shape_self(df_roi[train_idxs], l_roi[train_idxs], df_roi[test_idxs], l_roi[test_idxs])
        model = create_model(units_vec,
                            input_dim = x_train.shape[1],
                            dropout_rate = dropout_rate,
                            learning_rate = learning_rate)
        
        H = model.fit(x_train, y_train,
                      validation_data = (x_test, y_test),
                      epochs = np.max(liste_n_epochs),
                      batch_size = batch_size,
                      callbacks = None
                     )
        
        accs = np.asarray(H.history["val_accuracy"])
        accuracies[:, i_split] = accs[(np.asarray(liste_n_epochs)-1).astype(int)]
        i_split = i_split + 1
        
    # Über unterschiedliche Splits das Mittel bilden
    return np.mean(accuracies, 1) 

    

In [23]:
splits =  cv.split(df_roi, l_roi, fid_roi)
splits = list(splits) # Generatoren sind schlecht wiederverwendbar. Deshalb Liste draus machen.

# Abkürzungen
len_n_ep = len(liste_n_epochs)
len_lr = len(liste_learning_rates)
len_dr = len(liste_dropout_rates)
len_bs = len(liste_batch_sizes)
len_uv = len(liste_units_vecs)


mean_accuracies = np.zeros([len_n_ep, len_lr, len_dr, len_bs, len_uv])

# Schleife über die Epochen hinweg muss nicht gemacht werden.
# Es reicht, einmal bis zum maximalen zu gehen und die Zwischenzeiten rauszulesen.
for i_lr in range(len_lr):
    for i_dr in range(len_dr):
        for i_bs in range(len_bs):
            for i_uv in range(len_uv):
                print("i_lr: ", i_lr, "/", len_lr-1)
                print("   i_dr:", i_dr, "/", len_dr-1)
                print("        i_dr:", i_bs, "/", len_bs-1)
                print("             i_uv:", i_uv, "/", len_uv-1)
                mean_accuracies[:, i_lr, i_dr, i_bs, i_uv] = test_mean_acc(units_vec = liste_units_vecs[i_uv],
                                                                     splits = splits,
                                                                     liste_n_epochs = liste_n_epochs,
                                                                     learning_rate = liste_learning_rates[i_lr],
                                                                     dropout_rate = liste_dropout_rates[i_dr],
                                                                     batch_size = liste_batch_sizes[i_bs]
                                                                    )

i_lr:  0 / 2
   i_dr: 0 / 1
        i_dr: 0 / 1
             i_uv: 0 / 1
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/

In [24]:
print("Im Folgenden werden für die Epochen ", liste_n_epochs, " und die angegebenen Hyperparameter die Accuracies ausgegeben")
print(" ")

print("Die Accuracies dafür sind:")
for i_lr in range(len_lr):
    for i_dr in range(len_dr):
        for i_bs in range(len_bs):
            for i_uv in range(len_uv):
                print("learning_rate = ", liste_learning_rates[i_lr],
                      "  dropout_rate = ", liste_dropout_rates[i_dr],
                     "  batch_size = ", liste_batch_sizes[i_bs],
                     "  units-vec = ", liste_units_vecs[i_uv])
                print(mean_accuracies[:, i_lr, i_dr, i_bs, i_uv])

Im Folgenden werden für die Epochen  [5, 20, 60]  und die angegebenen Hyperparameter die Accuracies ausgegeben
 
Die Accuracies dafür sind:
learning_rate =  0.01   dropout_rate =  0   batch_size =  8   units-vec =  [50, 20]
[0.5459474  0.51006415 0.54608074]
learning_rate =  0.01   dropout_rate =  0   batch_size =  8   units-vec =  [50, 40, 30, 20]
[0.49048784 0.50715744 0.55527022]
learning_rate =  0.01   dropout_rate =  0   batch_size =  32   units-vec =  [50, 20]
[0.4767783  0.56330237 0.56398767]
learning_rate =  0.01   dropout_rate =  0   batch_size =  32   units-vec =  [50, 40, 30, 20]
[0.51542427 0.56661707 0.5368166 ]
learning_rate =  0.01   dropout_rate =  0.5   batch_size =  8   units-vec =  [50, 20]
[0.52461371 0.53800061 0.54935813]
learning_rate =  0.01   dropout_rate =  0.5   batch_size =  8   units-vec =  [50, 40, 30, 20]
[0.51870695 0.48118103 0.47910899]
learning_rate =  0.01   dropout_rate =  0.5   batch_size =  32   units-vec =  [50, 20]
[0.54665408 0.55522221 0.5566