# Data Load - 전처리 width, height, input_channel 정의

In [1]:
import os
import time
import numpy as np
from numpy import dstack
import pandas as pd
from pandas import read_csv
from pandas import DataFrame
import matplotlib.pyplot as plt 
from scipy.io import arff, loadmat
import pandas as pd
from sklearn.model_selection import train_test_split

#### Load Multivariate Time Series Data

In [96]:
# fnames = ['ArabicDigits','CMUsubject16','ECG','JapaneseVowels','KickVsPunch', 'Libras','Outflow','UWave','Wafer','WalkVsRun']
fnames = ['Libras','UWave']

In [98]:
fname = fnames[0]
data_path = './data/'+fname
mat = loadmat(data_path+'/'+fname+'.mat') 
print('Load '+fname+' dataset')

Load Libras dataset


In [99]:
trainx = mat['mts']['train']
trainy = mat['mts']['trainlabels']
testx = mat['mts']['test']
testy = mat['mts']['testlabels']

nb_trains = trainx[0][0].shape[1]
nb_tests = testx[0][0].shape[1]
nb_classes = trainy[0][0][-1][0]
print('nb_trains:',nb_trains,'\tnb_tests:',nb_tests, '\tnb_classes:', nb_classes)
nb_features = trainx[0][0][0][0].shape[0]
length = trainx[0][0][0][0].shape[1]
print('nb_features:',nb_features, '\tlength:', length)

nb_trains: 180 	nb_tests: 180 	nb_classes: 15
nb_features: 2 	length: 45


#### make (nb_trains, length, nb_features)
Libras와 UWave를 제외한 나머지 데이터들은 length가 유동적이여서 현재 모델 입력에 적당하지 않음
따라서 Libras와 UWave 데이터만 일단 사용

In [100]:
def reshaping(X, Y):
    X = X[0][0][0]
    reshapedX = []
    for i in range(len(X)):
        reshapedX.append(X[i])
    reshapedX = np.array(reshapedX, dtype=np.float64)
    X = reshapedX.reshape(reshapedX.shape[0], reshapedX.shape[2], reshapedX.shape[1])
    
    Y = Y[0][0]
    reshapedY = []
    for i in range(len(Y)):
        reshapedY.append(Y[i][0])

    reshapedY = np.array(reshapedY, dtype=np.int64)
    trainy = reshapedY.reshape(reshapedY.shape[0], 1)
    
    return X, Y    

In [101]:
trainx, trainy = reshaping(trainx, trainy)

In [102]:
testx, testy = reshaping(testx, testy)

In [103]:
batch_size = min(int(trainx.shape[0]/10), 16)
print ("batch size:{}".format(batch_size))
print(trainx.shape, trainy.shape)
print(testx.shape, testy.shape)

batch size:16
(180, 45, 2) (180, 1)
(180, 45, 2) (180, 1)


In [104]:
hot_encoded_y_train = np.asarray(pd.get_dummies(np.asarray(trainy.flatten())))
hot_encoded_y_test = np.asarray(pd.get_dummies(np.asarray(testy.flatten())))
print("y train shape: {}".format(hot_encoded_y_train.shape))
print("y test shape: {}".format(hot_encoded_y_test.shape))

y train shape: (180, 15)
y test shape: (180, 15)


In [105]:
trainX = trainx[:, np.newaxis, :]
trainY = hot_encoded_y_train
validX =testx[:len(testx)//2,np.newaxis,:]
validY= hot_encoded_y_test[:len(testx)//2]
testX = testx[len(testx)//2:,np.newaxis,:]
testY = hot_encoded_y_test[len(testx)//2:]
print ("trainX shape:{}".format(trainX.shape))
print ("trainY shape:{}".format(trainY.shape))
print ("validX shape:{}".format(validX.shape))
print ("validY shape:{}".format(validY.shape))
print ("testX shape:{}".format(testX.shape))
print ("testY shape:{}".format(testY.shape))

trainX shape:(180, 1, 45, 2)
trainY shape:(180, 15)
validX shape:(90, 1, 45, 2)
validY shape:(90, 15)
testX shape:(90, 1, 45, 2)
testY shape:(90, 15)


In [106]:
height = trainX.shape[1]
width = trainX.shape[2]
input_channel = trainX.shape[3]
print ("height {}".format(height))
print ("width {}".format(width))
print ("input_channel {}".format(input_channel))

height 1
width 45
input_channel 2


# Another dataset

## FaceDetection
Classes: Face (Class 1) or Scramble (Class 0)

Dataset Link: http://www.timeseriesclassification.com/description.php?Dataset=FaceDetection

In [29]:
trn = arff.loadarff('./data/Multivariate_arff/FaceDetection/FaceDetection_TRAIN.arff')
tst = arff.loadarff('./data/Multivariate_arff/FaceDetection/FaceDetection_TEST.arff')

In [33]:
trainx, trainy  = trn[0]['SERIES'], trn[0]['class']
testx, testy  = tst[0]['SERIES'], tst[0]['class']

In [34]:
def reshapingFaceDetection(X, Y):
    reshapedX = []
    sample = []
    for i in range(len(X)):
        for j in range(len(X[i])):
            sample.append(X[i][j].tolist())
        reshapedX.append(sample)
        sample = []
    reshapedX = np.array(reshapedX, dtype=np.float64)
    
    reshapedY = []
    reshapedY = np.array(Y, dtype=np.int64)
    reshapedY = reshapedY.reshape(reshapedY.shape[0],1)

    return reshapedX, reshapedY    

In [35]:
trainx, trainy = reshapingFaceDetection(trainx, trainy)
testx, testy = reshapingFaceDetection(testx, testy)

In [36]:
print('train: ', trainx.shape, trainy.shape)
print('test: ', testx.shape, testy.shape)

train:  (5890, 144, 62) (5890, 1)
test:  (3524, 144, 62) (3524, 1)


In [37]:
batch_size = min(int(trainx.shape[0]/10), 16)
print ("batch size:{}".format(batch_size))
print(trainx.shape, trainy.shape)
print(testx.shape, testy.shape)

batch size:16
(5890, 144, 62) (5890, 1)
(3524, 144, 62) (3524, 1)


In [38]:
hot_encoded_y_train = np.asarray(pd.get_dummies(np.asarray(trainy.flatten())))
hot_encoded_y_test = np.asarray(pd.get_dummies(np.asarray(testy.flatten())))
print("y train shape: {}".format(hot_encoded_y_train.shape))
print("y test shape: {}".format(hot_encoded_y_test.shape))

y train shape: (5890, 2)
y test shape: (3524, 2)


In [39]:
trainX = trainx[:, np.newaxis, :]
trainY = hot_encoded_y_train
validX =testx[:len(testx)//2,np.newaxis,:]
validY= hot_encoded_y_test[:len(testx)//2]
testX = testx[len(testx)//2:,np.newaxis,:]
testY = hot_encoded_y_test[len(testx)//2:]
print ("trainX shape:{}".format(trainX.shape))
print ("trainY shape:{}".format(trainY.shape))
print ("validX shape:{}".format(validX.shape))
print ("validY shape:{}".format(validY.shape))
print ("testX shape:{}".format(testX.shape))
print ("testY shape:{}".format(testY.shape))

trainX shape:(5890, 1, 144, 62)
trainY shape:(5890, 2)
validX shape:(1762, 1, 144, 62)
validY shape:(1762, 2)
testX shape:(1762, 1, 144, 62)
testY shape:(1762, 2)


In [40]:
height = trainX.shape[1]
width = trainX.shape[2]
input_channel = trainX.shape[3]
print ("height {}".format(height))
print ("width {}".format(width))
print ("input_channel {}".format(input_channel))

height 1
width 144
input_channel 62
