In [1]:
import numpy as np
import logging
import math
import pandas as pd
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [6]:
import os
import sys

In [11]:
from sktime.transformations.panel.rocket import Rocket, MultiRocketMultivariate
from sklearn.linear_model import RidgeClassifierCV
from sktime.datasets import load_from_tsfile_to_dataframe
from sktime.transformations.panel import catch22

In [8]:
FILE_NAME_X = '{}_{}_X'
FILE_NAME_Y = '{}_{}_Y'
FILE_NAME_PID = '{}_{}_pid'

In [9]:
def read_dataset(path, data_type):
    x_train, y_train = load_from_tsfile_to_dataframe(os.path.join(path,
                                                                  FILE_NAME_X.format("TRAIN", data_type) + ".ts"))

    logger.info("Training data shape {} {} {}".format(x_train.shape, len(x_train.iloc[0, 0]), y_train.shape))
    x_test, y_test = load_from_tsfile_to_dataframe(os.path.join(path,
                                                                FILE_NAME_X.format("TEST", data_type) + ".ts"))
    logger.info("Testing data shape: {} {}".format(x_test.shape, y_test.shape))

    logger.info("Testing data shape: {} {}".format(x_test.shape, y_test.shape))
    test_pid = np.load(os.path.join(path, FILE_NAME_PID.format("TEST", data_type) + ".npy"), allow_pickle=True)
    train_pid = np.load(os.path.join(path, FILE_NAME_PID.format("TRAIN", data_type) + ".npy"), allow_pickle=True)

    try:
        x_val, y_val = load_from_tsfile_to_dataframe(os.path.join(path,
                                                                  FILE_NAME_X.format("VAL", data_type) + ".ts"))
        logger.info("Validation data shape: {} {}".format(x_val.shape, y_val.shape))
    except FileNotFoundError:
        logger.info("Validation data is empty:")
        x_val, y_val = None, None

    return x_train, y_train, x_test, y_test, x_val, y_val, train_pid, test_pid

In [None]:
data_type = "default"
path = "/home/ashish/Results/Datasets/HPE3/MP/TrainTestDataSktime/{}/MulticlassSplit/"
output_path = "/home/ashish/Results/Datasets/HPE3/MP/Catch22/{}/"
for sv in [103007,1899797,191099]:
    print(path)
    x_train, y_train, x_test, y_test, x_val, y_val, train_pid, test_pid = read_dataset(path.format(sv), data_type)
    catch = catch22.Catch22()
    catch.fit(x_train)
    x_training_transform = catch.transform(x_train)

    te = time.time()
    total_time = (te - ts)

    print("Training transform took {} ".format(total_time))

    ts = time.time()        
    x_test_transform = catch.transform(x_test)
    te = time.time()
    total_time = (te - ts)
    print("Testing transform took {} ".format(total_time))
    x_training_transform.to_csv(os.path.join(output_path.format(sv), "x_train_{}.csv".format(data_type)), index=False)
    x_test_transform.to_csv(os.path.join(output_path.format(sv), "x_test_{}.csv".format(data_type)), index=False)
    
    np.save(os.path.join(output_path.format(sv), "y_test_{}.npy".format(data_type)), y_test)
    np.save(os.path.join(output_path.format(sv), "y_train_{}.npy".format(data_type)), y_train)

In [None]:

data_type = "default"
path = "/home/ashish/Results/Datasets/Shimmer/MP/TrainTestData/{}/MulticlassSplit/"
output_path = "/home/ashish/Results/Datasets/Shimmer/MP/tsfresh/{}/"

for sv in [103007,1899797,191099]:
    x_train, y_train, x_test, y_test = read_dataset(path.format(sv), data_type)
    train_shape = x_train.shape[0]
    test_shape = x_test.shape[0]
    
    full_data = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))
    full_data_long = pd.DataFrame(full_data.reshape(full_data.shape[0]*full_data.shape[1], full_data.shape[2]))
        
    full_data_long["id"] = get_id_col(full_data)
    
    y = pd.Series(y)
    y.index = np.arange(1, y.shape[0]+1)

    
    full_data_transform = extract_relevant_features(full_data_long, y, column_id='id')
    
    x_training_transform = full_data_transform.iloc[0:train_shape, :]
    x_test_transform = full_data_transform.iloc[train_shape:, :]

    x_training_transform.to_csv(os.path.join(output_path.format(sv), "x_train_{}.csv".format(data_type)), index=False)
    x_test_transform.to_csv(os.path.join(output_path.format(sv), "x_test_{}.csv".format(data_type)), index=False)
    y_train.to_csv(os.path.join(output_path.format(sv), "y_train_{}.csv".format(data_type)), index=False)
    y_test.to_csv(os.path.join(output_path.format(sv), "y_test_{}.csv".format(data_type)), index=False)
    
np.save(os.path.join(output_path.format(sv), "y_test_{}.npy".format(data_type)), y_test)
np.save(os.path.join(output_path.format(sv), "y_train_{}.npy".format(data_type)), y_train)