## Importing stuff

In [30]:
import sys, os
import tsai
from tsai.all import *
display(HTML("<style>.container {width:95% !important; }</style>"))

In [31]:
import numpy as np
import pandas as pd
import pickle

In [34]:
sys.path.append(os.path.abspath('../../../'))
from utils import utils

## Printing configs

In [4]:
print('tsai       :', tsai.__version__)
print('fastai2    :', fastai2.__version__)
print('fastcore   :', fastcore.__version__)
print('torch      :', torch.__version__)
print('scipy      :', sp.__version__)
print('numpy      :', np.__version__)
print('pandas     :', pd.__version__)
print(f'Total RAM  : {bytes2GB(psutil.virtual_memory().total):5.2f} GB')
print(f'Used RAM   : {bytes2GB(psutil.virtual_memory().used):5.2f} GB')
print('n_cpus     :', cpus)
iscuda = torch.cuda.is_available()
if iscuda: print('device     : {} ({})'.format(device, torch.cuda.get_device_name(0)))
else: print('device     :', device)

tsai       : 0.1.0
fastai2    : 0.0.17
fastcore   : 0.1.17
torch      : 1.5.0a0+8f84ded
scipy      : 1.4.1
numpy      : 1.18.1
pandas     : 0.24.2
Total RAM  :  7.77 GB
Used RAM   :  2.77 GB
n_cpus     : 12
device     : cuda (GeForce RTX 2060 SUPER)


## Loading data

In [8]:
path_to_data='../../../../'

In [None]:
data=np.load(os.path.abspath(path_to_data+'data-002.npy'),allow_pickle=True)
Y=np.load(path_to_data+'Y.npy',allow_pickle=True)
labels =pd.read_csv(path_to_data+'labels.csv')

train_fold=8
val_fold=9
test_fold=10

# 10th fold for testing (9th for now)
X_test = data[labels.strat_fold == test_fold]
y_test = Y[labels.strat_fold == test_fold]
# 9th fold for validation (8th for now)
X_val = data[labels.strat_fold == val_fold]
y_val = Y[labels.strat_fold == val_fold]
# rest for training
X_train = data[labels.strat_fold <= train_fold]
y_train = Y[labels.strat_fold <= train_fold]

In [None]:
# Preprocess signal data
X_train, X_val, X_test = utils.preprocess_signals(X_train, X_val, X_test,'/content/')
n_classes = y_train.shape[1]
X_train = np.reshape(X_train,[X_train.shape[0],X_train.shape[2],X_train.shape[1]])
X_val = np.reshape(X_val,[X_val.shape[0],X_val.shape[2],X_val.shape[1]])
X_test = np.reshape(X_test,[X_test.shape[0],X_test.shape[2],X_test.shape[1]])

In [None]:
display(X_train.shape)
_, features, seq_len = X_train.shape

In [None]:
X_train = torch.tensor(X_train, dtype=torch.float32, device=device)
X_val = torch.tensor(X_val, dtype=torch.float32, device=device)
labels = np.unique(y_train)
transform = {}
for i, l in enumerate(labels): transform[l] = i
y_train = np.vectorize(transform.get)(y_train)
y_val = np.vectorize(transform.get)(y_val)
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape)

Checking shapes

In [None]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape

UCR:(torch.Size([160, 10, 400]), (160,), torch.Size([74, 10, 400]), (74,))

ECG:(torch.Size([17111, 12, 1000]),(17111, 5),torch.Size([2156, 12, 1000]),(2156, 5))

## Instantiating the model

In [None]:
n_kernels=10_000
kss=[7, 9, 11]
model = ROCKET(features, seq_len, n_kernels=n_kernels, kss=kss).to(device)

## Extracting features

In [None]:
X_train_tfm = model(X_train).cpu().numpy()
X_val_tfm = model(X_val).cpu().numpy()

Saving features

In [None]:
with open(path_to_data + 'features_X_train.pkl','wb') as handle:
    pickle.dump(X_train_tfm,handle,protocol=pickle.HIGHEST_PROTOCOL)

with open(path_to_data + 'features_X_val.pkl','wb') as handle:
    pickle.dump(X_val_tfm,handle,protocol=pickle.HIGHEST_PROTOCOL)

with open(path_to_data + 'y_train.pkl','wb') as handle:
    pickle.dump(y_train,handle,protocol=pickle.HIGHEST_PROTOCOL)

with open(path_to_data + 'y_val.pkl','wb') as handle:
    pickle.dump(y_val,handle,protocol=pickle.HIGHEST_PROTOCOL)

Loading features

In [10]:
path_to_data + 'y_val.pkl'

'../../../../y_val.pkl'

In [15]:
from glob import glob
glob('../../../../*')

['../../../../ecg-ml-dl',
 '../../../../data-002.npy',
 '../../../../Pipfile.lock',
 '../../../../Y.npy',
 '../../../../__init__.py',
 '../../../../timeseriesAI',
 '../../../../labels.csv',
 '../../../../Pipfile']

In [17]:
with open(path_to_data + 'features_X_train.pkl','rb') as handle:
    X_train_tfm = pickle.load(handle)

with open(path_to_data + 'features_X_val.pkl','rb') as handle:
    X_val_tfm = pickle.load(handle)

with open(path_to_data + 'y_train.pkl','rb') as handle:
    y_train = pickle.load(handle)

with open(path_to_data + 'y_val.pkl','rb') as handle:
    y_val = pickle.load(handle)

In [18]:
y_train.shape, y_val.shape

((17111, 5), (2156, 5))

Reshaping from one hot vectors to single class column

In [19]:
y_train = utils.dummies_to_categorical(y_train)
y_val = utils.dummies_to_categorical(y_val)

In [20]:
 y_train.shape,y_val.shape

((17111,), (2156,))

## Actially running a classifier on the features

In [21]:
from sklearn.linear_model import RidgeClassifierCV
ridge = RidgeClassifierCV(alphas=np.logspace(-8, 8, 17), normalize=True)
ridge.fit(X_train_tfm, y_train)
print('alpha: {:.2E}  train: {:.5f}  valid: {:.5f}'.format(ridge.alpha_, 
                                                           ridge.score(X_train_tfm, y_train), 
                                                           ridge.score(X_val_tfm, y_val)))

alpha: 1.00E+00  train: 0.72322  valid: 0.60436


## Evaluating

In [22]:
pred = ridge.predict(X_val_tfm)
pred

<module 'utils.utils' from '/home/pedropva/Desktop/time_series/ecg-ml-dl/utils/utils.py'>

In [24]:
y_val.shape,pred.shape

((2156,), (2156,))

In [40]:
%load_ext autoreload
%autoreload 2

In [61]:
y_val_df = pd.get_dummies(pd.Series(y_val))
y_pred_df = pd.get_dummies(pd.Series(pred))

In [65]:
df_result = utils.evaluate_experiment(y_val_df.values,y_pred_df.values)
df_result

Unnamed: 0,macro_auc,Fmax
0,0.646679,0.60436
