In [1]:
# Importing necessary python packages

import pandas as pd
import numpy as np
import random
import pickle
import os

# Set the random seed

# Set the seed value for experiment reproducibility.
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value) 

In [2]:
# Setting path and other input variable

VAL_RATIO = 0.2 # according to https://keras.io/examples/timeseries/timeseries_classification_from_scratch/
DATASET_NAME = "Keras_Ford"
TRAIN_DATA = "FordA_TRAIN.tsv"
TEST_DATA = "FordA_TEST.tsv"
FS = 1

In [3]:
# Loading dataset
FordA_TRAIN = pd.read_csv(TRAIN_DATA, sep="\t", header=None)
y_train_original = np.array(FordA_TRAIN[0])
X_train_original = np.array(FordA_TRAIN)[:,1:]

FordA_TEST = pd.read_csv(TEST_DATA, sep="\t", header=None)
y_test = np.array(FordA_TEST[0])
X_test = np.array(FordA_TEST)[:,1:]

# Shuffling data, creating val samples

idx = np.random.permutation(len(X_train_original))

num_val_samples = int(VAL_RATIO*len(idx))

idx_train = idx[num_val_samples:]
idx_val = idx[:num_val_samples]

# Splitting data into training (which will be data augmented) and 
# validation (which will be kept for model validation)
X_train = X_train_original[idx_train]
y_train = y_train_original[idx_train]

X_val = X_train_original[idx_val]
y_val = y_train_original[idx_val]

idx_test = np.random.permutation(len(X_test))
X_test = X_test[idx_test]
y_test = y_test[idx_test]



# Processing labels
y_train[y_train == -1] = 0
y_val[y_val == -1] = 0
y_test[y_test == -1] = 0



# Create output dict

dict_train_test_db = {"X_train": [], "y_train": [], "X_test": [], "y_test": [], "X_val": [], "y_val":[], "val_ratio": VAL_RATIO}
dataset_description = "Keras Ford A dataset"
dataset_preprocessing = """Check https://www.cs.ucr.edu/~eamonn/time_series_data/ and https://keras.io/examples/timeseries/timeseries_classification_from_scratch/"""


dict_train_test_db["X_train"] = X_train
dict_train_test_db["X_val"] = X_val
dict_train_test_db["X_test"] = X_test

dict_train_test_db["y_train"] = y_train
dict_train_test_db["y_val"] = y_val
dict_train_test_db["y_test"] = y_test

dict_train_test_db["fs"] = FS


dict_train_test_db["dataset_description"] = """
This dataset [1] was used by Keras in tutorial [2] for time series classification. As per the Keras tutorial page, the dataset 
has measurements of sensors installed in a Ford car engine and was collected by Ford in a competition to diagnose and
predict engine failures. The signals have 500 measurements and are divided into two classes. The negative class (here
transformed to 0) represent observations corresponding to normal engine behavior. The positive (here, 1) class are 
from an engine presenting failure. 
"""

dict_train_test_db["links"] = """
[1] Chen, Y., Keogh, E., Hu, B., Begum, N., Bagnall, A., Mueen, A., & Batista, G. (n.d.). The UCR Time Series Classification Archive. Retrieved from https://www.cs.ucr.edu/~eamonn/time_series_data/
[2] https://keras.io/examples/timeseries/timeseries_classification_from_scratch/
"""





In [4]:
# Saving output file

output_filename = "train_test_data_keras_ford.pkl"
with open(output_filename, "wb") as handle:
    pickle.dump(dict_train_test_db, handle, protocol=3)
