## Installing dependencies

In [13]:
stable = True # Set to True for latest pip version or False for main branch in GitHub
!pip install {"tsai -U" if stable else "git+https://github.com/timeseriesAI/tsai.git"} >> /dev/null

In [14]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.gaussian_process import GaussianProcessRegressor
from scipy.interpolate import interp1d
from sklearn.gaussian_process.kernels import RBF
import torch
from sklearn.preprocessing import StandardScaler
import warnings
from tsai.all import *
import sklearn.metrics as skm
from IPython.display import clear_output
import copy
my_setup()
warnings.simplefilter('ignore')

os              : Linux-5.15.133+-x86_64-with-glibc2.31
python          : 3.10.13
tsai            : 0.3.8
fastai          : 2.7.13
fastcore        : 1.5.29
torch           : 2.1.2
device          : 1 gpu (['Tesla P100-PCIE-16GB'])
cpu cores       : 2
threads per cpu : 2
RAM             : 31.36 GB
GPU memory      : [16.0] GB


In [15]:
X_rand = np.load("/kaggle/input/np-arrays/X_rand_samp.npy")
y_wtd = np.load("/kaggle/input/np-arrays/y_wtd_samp.npy")
X_wtd=np.load('/kaggle/input/np-arrays/X_wtd_samp.npy')
y_rand= np.load('/kaggle/input/np-arrays/y_rand_samp.npy')


In [16]:
from sklearn.model_selection import train_test_split
# Further split the training set into train and validation sets (75/25 split)
X_train, X_val, y_train, y_val = train_test_split(X_wtd, y_wtd, test_size=0.2)
# X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2)


### Initializing data

In [17]:
bs = 16
X, y, splits = combine_split_data([X_train, X_test], [y_train, y_test])
tfms  = [None, [Categorize()]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits)
dls   = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs])


### Finding a good initial learning rate for the model before tuning

In [19]:
# learn = Learner(dls, model_to_train(dls.vars, dls.c, dls.len), metrics=[F1Score(average='macro')])
# learn.lr_find()

NameError: name 'model_to_train' is not defined

### Hyperparam tuning of the MODEL

In [43]:
# Use best initial epochs and max_lr
epochs = 25
max_lr = 1e-3

In [46]:
from fastai.callback.tracker import *
early_stopping = EarlyStoppingCallback(patience=3, min_delta=0.05)

* Conclusions from 
* model1: RNNPlus
* model2: RNNAttention
* https://docs.google.com/document/d/1skeTIoSMsVwEUPVI6HAi7rVfM1dVMvA0BYECeTmwUvQ/edit?usp=sharing
* LSTM, GRU not used as RNN showed better results


In [None]:
# Focus on RNNPlus hyperparameters, ignore irrelevant ones
model2 = RNNPlus
max_lr = 2.12e-4
seq_len = [  # Set based on your sequence length or strategy
    # ... (e.g., different fixed lengths or truncation methods)
]
hidden_size = [1024]  # Experiment with different sizes
n_layers = [4]  # Try 1-3 layers
rnn_dropout = [0.1, 0.2, 0.3]  # Starting points for dropout

archs = []
for out_classes in c_out:
    for length in seq_len:
      for h_size in hidden_size:
        for layers in n_layers:
          for lr in learning_rate:
            for dropout in rnn_dropout:
              archs.append((model_to_train, {
#                 'c_in': your_input_channels,  # Set based on your data shape
#                 'c_out': out_classes,
                'seq_len': length,
                'hidden_size': h_size,
                'n_layers': layers,
                'rnn_dropout': dropout,
                # Ignore other less important parameters for now
              }))


In [44]:
# {'d_model': 64, 'depth': 8, 'lstm_dropout': 0.1, 'dropout': 0.1, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}	5970213	0.427612	0.422031	0.758592	23	25	0.001
# 1	TSSequencerPlus	{'d_model': 64, 'depth': 2, 'lstm_dropout': 0.3, 'dropout': 0.2, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}	

model3= TSSequencerPlus
d_model = [32, 64, 128]
depth = [2, 4, 8]
lstm_dropout = [0.1, 0.2, 0.3]
dropout = [0.1, 0.2, 0.3]
mlp_ratio = [2, 4]
archs = []
for dimension in d_model:
    for transfblock in depth:
      for tfr_dropout in dropout:
        for lstmdropout in lstm_dropout:
          for mlp_dim_ratio in mlp_ratio:
            archs.append((model_to_train, {
              'd_model': dimension,  # Add d_model for embedding dimension
              'depth': transfblock,  # Add depth for number of transformer blocks
              'lstm_dropout': lstmdropout,
              'dropout': tfr_dropout,  # Use tfr_dropout for transformer dropout
              'mlp_ratio': mlp_dim_ratio,
              'n_layers': rnn_layers,
              'hidden_size': hidden_size,
              'use_pe': True,  # Enable positional encoding by default
              # Remove commented-out lines for unused parameters
            }))

In [None]:

# Focus on InceptionTime hyperparameters, remove unnecessary sections
model4 = InceptionTime
nf = [32, 64, 128]
nb_filters = [16, 32, 64]
ks = [3, 5, 7]
bottleneck = [True, False]  # Experiment with both options

archs = []
for f in nf:
    for num_filters in nb_filters:
      for kernel_size in ks:
        for use_bottleneck in bottleneck:
          archs.append((model_to_train, {
            'c_in': 1,  # Assuming grayscale keypoint data
            'c_out': len(your_activity_classes),  # Replace with actual number of activities
            'nf': f,
            'nb_filters': num_filters,
            'ks': kernel_size,
            'bottleneck': use_bottleneck,
          }))


In [None]:
model5 = TransformerRNNPlus
d_model = [32, 64, 128]
nhead = [4, 8, 16]
num_encoder_layers = [1, 2, 4]
dim_feedforward = [2 * d for d in d_model]  # Based on d_model values
dropout = [0.1, 0.2, 0.3]
num_rnn_layers = [1, 2, 3]  # Adjust if handling longer sequences

archs = []
for d in d_model:
    for h in nhead:
      for e in num_encoder_layers:
        for ff in dim_feedforward:
          for dr in dropout:
            for rnn in num_rnn_layers:
              archs.append((model_to_train, {
                'c_in': 1,  # Assuming grayscale keypoint data
                'c_out': len(your_activity_classes),
                'd_model': d,
                'nhead': h,
                'num_encoder_layers': e,
                'dim_feedforward': ff,
                'dropout': dr,
                'num_rnn_layers': rnn,
              }))


In [None]:
model6 = TSTPlus
n_layers = [2, 4, 6]
d_model = [64, 128, 256]
n_heads = [4, 8, 16]
d_ff = [d * 2 for d in d_model]  # Start with d_ff = 2 * d_model
attn_dropout = [0.1, 0.2, 0.3]
dropout = [0.1, 0.2, 0.3]
max_seq_len = [  # Adjust based on your data's average/max sequence length
    min(512, int(np.mean(your_sequence_lengths))),
    min(512, np.max(your_sequence_lengths))
]
learn_pe = [True, False]  # Experiment with both options

archs = []
for layers in n_layers:
    for dim in d_model:
      for heads in n_heads:
        for use_dropout in attn_dropout:
          for lstm_dropout in dropout:
            archs.append((model_to_train, {
              'n_layers': layers,
              'd_model': dim,
              'n_heads': heads,
              'd_ff': d_ff[d_model.index(dim)],
              'attn_dropout': use_dropout,
              'dropout': lstm_dropout,
              'max_seq_len': max_seq_len[0],  # Consider using both lengths later
              'learn_pe': learn_pe[0],  # Consider both True/False later
              # Set other parameters based on your data and preferences
            }))


In [None]:
model7 = PatchTST # Assuming you want to keep PatchTST
n_layers = [2, 4, 6]  # Experiment with more layers based on resources
d_model = [128, 256, 512]  # Adjust based on data complexity and memory
patch_len = [8, 16, 32]  # Consider activity duration and sequence length
stride = [1, 2, 4]  # Keep stride smaller than patch_len
n_heads = [4, 8, 16]  # Start with lower values and increase if needed
dropout = [0.1, 0.2, 0.3]  # Regularization for overfitting prevention

archs = []
for layers in n_layers:
    for dim in d_model:
      for p_len in patch_len:
        for p_stride in stride:
          for heads in n_heads:
            for dr in dropout:
              archs.append((model_to_train, {
                'n_layers': layers,
                'd_model': dim,
                'patch_len': p_len,
                'stride': p_stride,
                'n_heads': heads,
                'dropout': dr,
                # Other parameters with default or less impact values
              }))


model8 = SequentialRNN
model9 = ResNet

In [None]:
model_to_train=model3

In [None]:
# Create DataFrame to store results
results = pd.DataFrame(columns=['arch', 'hyperparams', 'total params', 'train loss', 'valid loss', 'f1_score', 'time', 'epochs', 'max_lr'])
model = create_model(model_to_train, dls=dls, **(archs[0][1]))
for i, (arch, k) in enumerate(archs):
    model_copy = copy.deepcopy(model)  

    print(model.__class__.__name__, '\n', k)
    learn = Learner(dls, model_copy, metrics=[F1Score(average='macro')], 
                   cbs=[early_stopping,]
                   )
    start = time.time()
    learn.fit_one_cycle(epochs, max_lr)  # Use correct epochs and lr for each iteration
    elapsed = time.time() - start
    
    vals = learn.recorder.values[-1]
    results.loc[i] = [arch.__name__, k, count_parameters(model), vals[0], vals[1], vals[2], int(elapsed), epochs, max_lr]
    results.sort_values(by='f1_score', ascending=False, kind='stable', ignore_index=True, inplace=True)
    clear_output()
    display(results.head(10))

# # Sort results
# results.sort_values(by='f1_macro', inplace=True, ascending=False)

# # Show top results
# display(results.head())


Unnamed: 0,arch,hyperparams,total params,train loss,valid loss,f1_score,time,epochs,max_lr
0,TSSequencerPlus,"{'d_model': 64, 'depth': 8, 'lstm_dropout': 0.1, 'dropout': 0.1, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.427612,0.422031,0.758592,23,25,0.001
1,TSSequencerPlus,"{'d_model': 64, 'depth': 2, 'lstm_dropout': 0.3, 'dropout': 0.2, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.427171,0.40442,0.753042,24,25,0.001
2,TSSequencerPlus,"{'d_model': 64, 'depth': 8, 'lstm_dropout': 0.2, 'dropout': 0.2, 'mlp_ratio': 2, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.451165,0.418198,0.743834,25,25,0.001
3,TSSequencerPlus,"{'d_model': 32, 'depth': 2, 'lstm_dropout': 0.3, 'dropout': 0.1, 'mlp_ratio': 2, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.451861,0.429594,0.739056,24,25,0.001
4,TSSequencerPlus,"{'d_model': 32, 'depth': 8, 'lstm_dropout': 0.3, 'dropout': 0.3, 'mlp_ratio': 2, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.456144,0.441845,0.738355,23,25,0.001
5,TSSequencerPlus,"{'d_model': 64, 'depth': 4, 'lstm_dropout': 0.2, 'dropout': 0.3, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.460782,0.489009,0.73432,24,25,0.001
6,TSSequencerPlus,"{'d_model': 64, 'depth': 8, 'lstm_dropout': 0.1, 'dropout': 0.3, 'mlp_ratio': 2, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.455294,0.45036,0.733868,24,25,0.001
7,TSSequencerPlus,"{'d_model': 128, 'depth': 2, 'lstm_dropout': 0.1, 'dropout': 0.3, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.462414,0.476504,0.724439,25,25,0.001
8,TSSequencerPlus,"{'d_model': 64, 'depth': 2, 'lstm_dropout': 0.1, 'dropout': 0.2, 'mlp_ratio': 2, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.471159,0.466182,0.72272,24,25,0.001
9,TSSequencerPlus,"{'d_model': 64, 'depth': 8, 'lstm_dropout': 0.3, 'dropout': 0.3, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}",5970213,0.454743,0.445658,0.722663,25,25,0.001


TSSequencerPlus 
 {'d_model': 128, 'depth': 4, 'lstm_dropout': 0.2, 'dropout': 0.3, 'mlp_ratio': 4, 'n_layers': 4, 'hidden_size': 512, 'use_pe': True}


epoch,train_loss,valid_loss,f1_score,time
0,2.540128,2.169359,0.085934,00:00
1,2.174905,1.774207,0.057692,00:00
2,1.866275,1.594628,0.160659,00:01
3,1.626725,1.366895,0.171355,00:00
4,1.450552,1.334503,0.153336,00:00
5,1.337895,1.189772,0.30082,00:00
6,1.248308,1.191933,0.190904,00:01
7,1.177796,1.074227,0.28825,00:00
8,1.09836,1.075785,0.333118,00:00
9,1.036152,1.061854,0.30127,00:00


Better model found at epoch 0 with valid_loss value: 2.169358968734741.
Better model found at epoch 1 with valid_loss value: 1.7742068767547607.
Better model found at epoch 2 with valid_loss value: 1.5946276187896729.
Better model found at epoch 3 with valid_loss value: 1.366895079612732.
Better model found at epoch 4 with valid_loss value: 1.334502935409546.
Better model found at epoch 5 with valid_loss value: 1.1897718906402588.
Better model found at epoch 7 with valid_loss value: 1.0742267370224.
Better model found at epoch 9 with valid_loss value: 1.0618541240692139.
Better model found at epoch 10 with valid_loss value: 0.9565954208374023.
Better model found at epoch 11 with valid_loss value: 0.8707296252250671.
Better model found at epoch 12 with valid_loss value: 0.8686229586601257.
Better model found at epoch 13 with valid_loss value: 0.7999101877212524.
Better model found at epoch 14 with valid_loss value: 0.6937692761421204.
Better model found at epoch 15 with valid_loss value

### Finding optimal starting LR for the tuned model

In [None]:
learn = Learner(dls, RNNAttention(dls.vars, dls.c, dls.len, rnn_layers=3, hidden_size=1024, encoder_layers=3, rnn_dropout=0.2), metrics=[F1Score(average='macro')])
learn.lr_find()

### Tuning n_epochs and max_learning_rate

In [None]:
# Fixed model hyperparameters
k = {'rnn_layers': 3, 'hidden_size': 1024, 'encoder_layers': 3, 'rnn_dropout': 0.2}

# Define options for epochs and max_lr
epochs_options = [75]
base_lr = 1.65e-5
del_lr = 5e-7
max_lrs = [base_lr]

results = pd.DataFrame(columns=['arch', 'hyperparams', 'total params', 'train loss', 'valid loss', 'f1_score', 'time', 'epochs', 'max_lr'])

for epochs in epochs_options:
   for max_lr in max_lrs:
       model = create_model(RNNAttention, dls=dls, **k)  # Create model within the loop
       learn = Learner(dls, model, metrics=[F1Score(average='macro')])

       print(model.__class__.__name__)
       start = time.time()
       learn.fit_one_cycle(epochs, max_lr)
       elapsed = time.time() - start
       vals = learn.recorder.values[-1]

       results.loc[len(results)] = [
           model.__class__.__name__,
           k,
           count_parameters(model),
           vals[0],
           vals[1],
           vals[2],
           int(elapsed),
           epochs,
           max_lr
       ]
       results.sort_values(by='f1_score', ascending=False, kind='stable', ignore_index=True, inplace=True)
       clear_output()
       display(results)

## Conclusion:


1.   RNNPlus: n_layers = 4, hidden_size = 1024, epochs=75, max_lr=2.12e-4
2.   RNNAttention:
`rnn_layers_options = [3]  
hidden_size_options = [1024]  
encoder_layers_options = [3]  
dropout_options = [0.2]
max_lr=1.65e-5 
epochs=75`

7. InceptionTime
7. TSSequencerPlus
9. TransformerRNNPlus
10. TSTPlus
11. PatchTST
12. ResNet
13. xresnet1d34_deeperplus




