In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Conv1D, GlobalAveragePooling1D, Dropout, Dense, BatchNormalization, ReLU
from tensorflow.keras.models import Model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, auc, precision_recall_curve
from tensorflow.keras.layers import Input, Permute
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils.class_weight import compute_class_weight
#from tensorflow.keras import backend as K

#####
from utils.data_extraction import download_zip, extract_zip
from utils.read_data import tsv_to_csv
from utils.train_model import train_lstm_fcn
from utils.save_best import save_best_result
from utils.finetune import fine_tune_model
from utils.move import move_files
#from load_data import load_ucr_dataset
#from build_model import build_lstm_fcn,build_alstm_fcn
#from ALSTM import AttentionLSTM, AttentionLSTMCell
#####

In [2]:
##########
# you can skip the FOLLOWING steps if you already have
### data folder containing csv ###
# and directly proceed to load dataset name and train data
#########

In [2]:
# as a substitute you can download the zip 
# from https://www.cs.ucr.edu/~eamonn/time_series_data_2018/UCRArchive_2018.zip
# and name it as UCRArchive_2018.zip to skip this step
download_zip(URL = "https://www.cs.ucr.edu/%7Eeamonn/time_series_data_2018/UCRArchive_2018.zip")

File downloaded successfully as UCRArchive_2018.zip


In [2]:
# it may take 5-10 minutes to extract the data.
# as a substitute you can extract the zip into a folder named "UCRArchive_2018" 
# under the current location to skip this step.
# the password to the zip is "someone". Good luck!
extract_zip(zip_file_path = "UCRArchive_2018.zip", pwd = "someone")

Files extracted successfully!


In [5]:
# we convert all the tsv file into csv file and store them in a folder named "data"
tsv_to_csv(input_folder = "UCRArchive_2018", output_folder = "data")

Converted: UCRArchive_2018/ECG5000/ECG5000_TRAIN.tsv to data/ECG5000_TRAIN.csv
Converted: UCRArchive_2018/ECG5000/ECG5000_TEST.tsv to data/ECG5000_TEST.csv
Converted: UCRArchive_2018/SonyAIBORobotSurface2/SonyAIBORobotSurface2_TEST.tsv to data/SonyAIBORobotSurface2_TEST.csv
Converted: UCRArchive_2018/SonyAIBORobotSurface2/SonyAIBORobotSurface2_TRAIN.tsv to data/SonyAIBORobotSurface2_TRAIN.csv
Converted: UCRArchive_2018/InlineSkate/InlineSkate_TRAIN.tsv to data/InlineSkate_TRAIN.csv
Converted: UCRArchive_2018/InlineSkate/InlineSkate_TEST.tsv to data/InlineSkate_TEST.csv
Converted: UCRArchive_2018/OliveOil/OliveOil_TEST.tsv to data/OliveOil_TEST.csv
Converted: UCRArchive_2018/OliveOil/OliveOil_TRAIN.tsv to data/OliveOil_TRAIN.csv
Converted: UCRArchive_2018/Earthquakes/Earthquakes_TEST.tsv to data/Earthquakes_TEST.csv
Converted: UCRArchive_2018/Earthquakes/Earthquakes_TRAIN.tsv to data/Earthquakes_TRAIN.csv
Converted: UCRArchive_2018/ShapesAll/ShapesAll_TRAIN.tsv to data/ShapesAll_TRAIN.c

In [3]:
##########
# you can skip the ABOVE steps if you already have
# UCRArchive_2018.zip, UCRArchive_2018 folder, data folder containing csv 
# and directly proceed to load dataset name and train data
#########

In [2]:
# we store all the 128 datasets' name in a list in alphabetic order
ucr_dir = './data'  # Path to UCR dataset directory
dataset_names = [f.split('_')[0] for f in os.listdir(ucr_dir) if f.endswith('_TRAIN.csv')]
dataset_names = sorted(dataset_names, key=str.lower)
print(dataset_names)

['ACSF1', 'Adiac', 'AllGestureWiimoteX', 'AllGestureWiimoteY', 'AllGestureWiimoteZ', 'ArrowHead', 'Beef', 'BeetleFly', 'BirdChicken', 'BME', 'Car', 'CBF', 'Chinatown', 'ChlorineConcentration', 'CinCECGTorso', 'Coffee', 'Computers', 'CricketX', 'CricketY', 'CricketZ', 'Crop', 'DiatomSizeReduction', 'DistalPhalanxOutlineAgeGroup', 'DistalPhalanxOutlineCorrect', 'DistalPhalanxTW', 'DodgerLoopDay', 'DodgerLoopGame', 'DodgerLoopWeekend', 'Earthquakes', 'ECG200', 'ECG5000', 'ECGFiveDays', 'ElectricDevices', 'EOGHorizontalSignal', 'EOGVerticalSignal', 'EthanolLevel', 'FaceAll', 'FaceFour', 'FacesUCR', 'FiftyWords', 'Fish', 'FordA', 'FordB', 'FreezerRegularTrain', 'FreezerSmallTrain', 'Fungi', 'GestureMidAirD1', 'GestureMidAirD2', 'GestureMidAirD3', 'GesturePebbleZ1', 'GesturePebbleZ2', 'GunPoint', 'GunPointAgeSpan', 'GunPointMaleVersusFemale', 'GunPointOldVersusYoung', 'Ham', 'HandOutlines', 'Haptics', 'Herring', 'HouseTwenty', 'InlineSkate', 'InsectEPGRegularTrain', 'InsectEPGSmallTrain', 'I

In [None]:
# 1st train: SELECT number of cells for each model and train 2000 epochs
#train_lstm_fcn model uses load_ucr_dataset(), build_lstm_fcn() and build_alstm_fcn() in it

#parameters used for training
num_cells_range = [8, 64, 128]
model_name = 'LSTM' #LSTM or ALSTM
ucr_dir = "./data"  #data folder location
epochs = 200
batch_size = 128
file_path = f"{model_name}_train_best_cell.csv"

In [3]:
##### NOTE: for your convinience, our output csv is directly saved in a safe location.
##### the code also output .keras models but they are useless, 
##### since we are searching hyperparameters not really training
##### so we didn't upload models output of this code!
##### running the cell would create a csv and a models folder to save the output.
##### keras models is saved once 1 dataset is down training;
##### for csv, you will have to wait until all 128 datasets processed to see a complete one.
##### ↑you will have to wait super long for the csv, 90 mins for LSTM and 120 mins for ALSTM :<

all_results = {}
# iterate through 128 datasets and train
for dataset_name in dataset_names:
    print(f"Training on dataset: {dataset_name}")
    best_num_cells, best_accuracy, best_loss,results, best_model_paths=train_lstm_fcn(model_name,
                                                                                      dataset_name,
                                                                                      ucr_dir,
                                                                                      num_cells_range,
                                                                                      epochs=epochs,
                                                                                      batch_size = batch_size,
                                                                                     verbose = 0)
    all_results[dataset_name] = {
        "dataset_name" : dataset_name,
        "best_num_cells" : best_num_cells,
        "accuracy": best_accuracy,
        "loss": best_loss,
        "best_model_paths":best_model_paths
    }
    
    # append best result to a csv at the current location once the current dataset have done 
    save_best_result(file_path, all_results[dataset_name])

Training on dataset: ACSF1
Training ACSF1 with num_cells = 8
dname = ACSF1, num_cells = 8, Test Accuracy = 0.3300, Test Loss = 2.0267
Training ACSF1 with num_cells = 64
dname = ACSF1, num_cells = 64, Test Accuracy = 0.3500, Test Loss = 1.7491
Training ACSF1 with num_cells = 128
dname = ACSF1, num_cells = 128, Test Accuracy = 0.1700, Test Loss = 2.4931
BEST: dname = ACSF1, Best num_cells: 64, Test Accuracy: 0.3500, Test Loss: 1.7491
ACSF1 best result appended successfully to LSTM_train_best_cell.csv
Training on dataset: Adiac
Training Adiac with num_cells = 8
dname = Adiac, num_cells = 8, Test Accuracy = 0.2353, Test Loss = 4.5505
Training Adiac with num_cells = 64
dname = Adiac, num_cells = 64, Test Accuracy = 0.1688, Test Loss = 7.7038
Training Adiac with num_cells = 128
dname = Adiac, num_cells = 128, Test Accuracy = 0.2916, Test Loss = 3.8689
BEST: dname = Adiac, Best num_cells: 128, Test Accuracy: 0.2916, Test Loss: 3.8689
Adiac best result appended successfully to LSTM_train_best_

In [42]:
# move the models in this round of training to another folder
# to make space for next round of training.
current_path = "./models"
to_path = "./models/hyper_search"

move_files(current_path,to_path)

Moved: LSTM__TwoLeadECG__8.keras
Moved: LSTM__FiftyWords__8.keras
Moved: LSTM__Haptics__64.keras
Moved: LSTM__DiatomSizeReduction__128.keras
Moved: LSTM__PLAID__128.keras
Moved: LSTM__MixedShapesRegularTrain__128.keras
Moved: LSTM__Worms__64.keras
Moved: LSTM__EOGVerticalSignal__128.keras
Moved: LSTM__GunPoint__64.keras
Moved: LSTM__ShapesAll__8.keras
Moved: LSTM__InlineSkate__8.keras
Moved: LSTM__CinCECGTorso__128.keras
Moved: LSTM__EOGHorizontalSignal__8.keras
Moved: LSTM__SonyAIBORobotSurface2__64.keras
Moved: LSTM__UMD__8.keras
Moved: LSTM__GestureMidAirD3__64.keras
Moved: LSTM__Yoga__8.keras
Moved: LSTM__ToeSegmentation2__128.keras
Moved: LSTM__Fungi__8.keras
Moved: LSTM__BirdChicken__64.keras
Moved: LSTM__Adiac__8.keras
Moved: LSTM__Mallat__64.keras
Moved: LSTM__Symbols__8.keras
Moved: LSTM__PigAirwayPressure__8.keras
Moved: LSTM__MixedShapesSmallTrain__128.keras
Moved: LSTM__Plane__8.keras
Moved: LSTM__InsectEPGSmallTrain__8.keras
Moved: LSTM__ProximalPhalanxOutlineCorrect__8.ke

In [None]:
#2nd train: for each dataset, train model with best cell number for 2000 epoches

#training parameter
model_name = 'LSTM' #LSTM or ALSTM
epochs = 2000
batch_size = 128
ucr_dir = "./data" #data folder location
file_path = f"{model_name}_train_2000_epoches.csv" # csv to save accuracy after this round of training

In [None]:
##### ATTENTION!!! 128 trained model of this step ↓, too large to upload to github
##### ATTENTION!!! so we only provide the model for Beef dataset for your reference
##### ATTENTION!!! you can try train any datasets in the 128 datasets
##### ATTENTION!!! we believe you will get similar result shown in our report!

In [44]:
##### NOTE: for your convinience, our output csv and model is directly saved in a safe location.
##### running the cell would create a csv and a models folder to save the output.(diffenrent models and csv!)
##### keras models is saved once 1 dataset is down training;
##### for csv, you will have to wait until all 128 datasets processed to see a complete one.
##### ↑you will have to wait super long for the csv, 180 mins for LSTM and 300 mins for ALSTM :<

# load record of best number of cells for each dataset of 200 epochs
# you need to finish the above 1st train to get the csv
# or you can change the directory to our saved csv for 1st train. No thanks!  
cell_record = pd.read_csv(f"{model_name}_train_best_cell.csv",encoding="UTF-8")
# iterate through 128 datasets and train
all_results_2000 = {}
for i in range(cell_record.shape[0]):
    dataset_name,num_cell,accuracy,loss,model_path = cell_record.iloc[i,:].values
    print(dataset_name, num_cell)
    num_cells_range = [num_cell]
    print(f"Training on dataset: {dataset_name}")
    best_num_cells, best_accuracy, best_loss,results,best_model_paths=train_lstm_fcn(model_name,
                                                                            dataset_name,
                                                                            ucr_dir,
                                                                            num_cells_range,
                                                                            epochs=epochs,
                                                                            batch_size = batch_size,
                                                                            verbose = 0)
    
    all_results_2000[dataset_name] = {
        "dataset_name" : dataset_name,
        "best_num_cells" : best_num_cells,
        "accuracy": best_accuracy,
        "loss": best_loss,
        "best_model_paths":best_model_paths
    }
    
    # append best result to a csv once the current dataset have done 
    save_best_result(file_path, all_results_2000[dataset_name])

ShapesAll 128
Training on dataset: ShapesAll
Training ShapesAll with num_cells = 128
dname = ShapesAll, num_cells = 128, Test Accuracy = 0.7683, Test Loss = 1.0846
BEST: dname = ShapesAll, Best num_cells: 128, Test Accuracy: 0.7683, Test Loss: 1.0846
ShapesAll best result appended successfully to LSTM_train_2000_epoches.csv
SmallKitchenAppliances 8
Training on dataset: SmallKitchenAppliances
Training SmallKitchenAppliances with num_cells = 8
dname = SmallKitchenAppliances, num_cells = 8, Test Accuracy = 0.3520, Test Loss = 1.0929
BEST: dname = SmallKitchenAppliances, Best num_cells: 8, Test Accuracy: 0.3520, Test Loss: 1.0929
SmallKitchenAppliances best result appended successfully to LSTM_train_2000_epoches.csv
SmoothSubspace 8
Training on dataset: SmoothSubspace
Training SmoothSubspace with num_cells = 8
dname = SmoothSubspace, num_cells = 8, Test Accuracy = 0.9000, Test Loss = 0.2320
BEST: dname = SmoothSubspace, Best num_cells: 8, Test Accuracy: 0.9000, Test Loss: 0.2320
SmoothSubs

In [None]:
# 3rd training: fine-tuning according to the paper
# fine_tune() uses load_ucr_dataset() in it


#parameters used for training
model_name = "LSTM" #LSTM or ALSTM
ucr_dir = "./data"
ini_lr = 1e-3
epochs = 50
ini_batch_size = 128
k = 5 # number of iterations
file_path = f"{model_name}__finetune.csv" # csv to save accuracy after this round of training

In [None]:
##### ATTENTION!!! 128 trained model of this step ↓, too large to upload to github
##### ATTENTION!!! so we only provide the model for Beef dataset for your reference
##### ATTENTION!!! you can try train any datasets in the 128 datasets
##### ATTENTION!!! we believe you will get similar result shown in our report!

In [3]:
##### NOTE: for your convinience, our output csv and model is directly saved in a safe location.
##### running the cell would create a csv and a models folder to save the output.(diffenrent models and csv!)
##### keras models is saved once 1 dataset is down training;
##### for csv, you will have to wait until all 128 datasets processed to see a complete one.
##### ↑you will have to wait super long for the csv, 60 mins for LSTM and 90 mins for ALSTM :<

# record of trained model for each dataset
# you need to finish the above 2nd train to get the csv
# or you can change the directory to our saved csv for 2nd train. No thanks!
trained_model = pd.read_csv( f"{model_name}_train_2000_epoches.csv",encoding = "UTF-8")
all_results_finetune = {}
#iterate through the 128 datasets and finetune
for i in range(trained_model.shape[0]):
    dataset_name,num_cell,accuracy,loss,model_path = trained_model.iloc[i,:].values
    print(f"Training on dataset: {dataset_name}")
    # don't move the 2nd train .keras model before you finish 3rd finetune! or our function would lose input :D
    # the fintuned models will be saved in the ./models/finetune directory directly.
    # Oh! to mention, we save the fintuned models for all k iterations for all 128 datasets
    # for each datasets only 1 out of k finetuned models is the best finetuned model (accuracy saved in csv)
    # please check which iteration best model belongs to in csv :D 
    best_iteration, best_accuracy, best_loss,best_model_paths= fine_tune_model(model_name,
                                                                                      dataset_name,
                                                                                      model_path,
                                                                                      ucr_dir,
                                                                                      initial_lr=ini_lr, 
                                                                                      initial_batch_size=ini_batch_size,
                                                                                      k=k, 
                                                                                      epochs_per_iteration= epochs, 
                                                                                      verbose = 0)
    all_results_finetune[dataset_name] = {
        "dataset_name" : dataset_name,
        "best_iteration" : best_iteration,
        "accuracy": best_accuracy,
        "loss": best_loss,
        "best_model_paths":best_model_paths
    }
    
    # append best result to a csv once the current dataset have done 
    save_best_result(file_path, all_results_finetune[dataset_name])

Training on dataset: ACSF1
Loaded model from ./models/LSTM__ACSF1__64.keras
Fine-tuning iteration 1/5 for dataset ACSF1
Iteration 1 - Validation Loss: 2.4199, Validation Accuracy: 0.2100
Fine-tuning iteration 2/5 for dataset ACSF1
Iteration 2 - Validation Loss: 2.8996, Validation Accuracy: 0.1800
Fine-tuning iteration 3/5 for dataset ACSF1
Iteration 3 - Validation Loss: 2.3052, Validation Accuracy: 0.2800
Fine-tuning iteration 4/5 for dataset ACSF1
Iteration 4 - Validation Loss: 3.4439, Validation Accuracy: 0.3700
Fine-tuning iteration 5/5 for dataset ACSF1
Iteration 5 - Validation Loss: 6.1096, Validation Accuracy: 0.3400
BEST: dname = ACSF1, Best model appears at iteration: 4, Val Accuracy: 0.3700, Val Loss: 3.4439
ACSF1 best result appended successfully to LSTM__finetune.csv
Training on dataset: Adiac
Loaded model from ./models/LSTM__Adiac__128.keras
Fine-tuning iteration 1/5 for dataset Adiac
Iteration 1 - Validation Loss: 7.8620, Validation Accuracy: 0.1100
Fine-tuning iteration 2

In [None]:
# after fine tune
# move the models in 2nd train to another folder to keep the directory neat!
current_path = "./models"
to_path = "./models/train"

move_files(current_path,to_path)

In [None]:
# You may be looking for plots!
# but we don't have plots for this project :D
# since we are training 128 datasets, we cannot output training accuracy curve for every dataset!
# for table illustration of number of best cell & accuracies, please see README or reports!
# or you can run pd.read_csv(csv_path, encoding = "UTF-8") to look into rare data!
# good luck and goodbye and happy holiday/New Year!