# Overview

In [1]:
library(reticulate)

In [2]:
sklearn <- import("sklearn")
datasets <- import("sktime.datasets")
np <- import("numpy")

# Prepare data

### Load dataset

In [3]:
arrow_head <- datasets$load_basic_motions(split="train", return_X_y=TRUE)

In [62]:
x_train <- arrow_head[[1]]
y_train <- arrow_head[[2]]

### Truncate some of the time series in order to complicate the problem

- 33.(3)% of sequences left at 50% length
- 33.(3)% of sequences truncated to 40% length
- 33.(3)% of sequences truncated to 30% length

#### In order to ensure that all classes are treated equally stratyfikacja

In [63]:
time_series_count = length(x_train[[1]])            # number of time series in the dataset
time_series_length = length(x_train[1,][[1]][[1]])  # number of observations in each time serie
time_series_dims = length(x_train[1,])              # number of dimensions of each time serie

first_index = 1                                     # index of first element of time series with 100% length
second_index = ceiling(time_series_count / 3)       # index of first element of time series with 75% length
third_index = ceiling(2 * time_series_count / 3)    # index of first element of time series with 50% length

mask = 0 : (time_series_length - 1)
for (i in first_index : (second_index - 1))
    for (j in 1 : time_series_dims)
        x_train[i,][[j]][[1]] = x_train[i,][[j]][[1]][mask]

mask = sort(sample(0 : (time_series_length - 1), ceiling(85 * time_series_length / 100))) 
for (i in second_index : (third_index - 1))
    for (j in 1 : time_series_dims)
        x_train[i,][[j]][[1]] = x_train[i,][[j]][[1]][mask]

    
mask = sort(sample(0 : (time_series_length - 1), ceiling(75 * time_series_length / 100)))
for (i in third_index : time_series_count)
    for (j in 1 : time_series_dims) 
        x_train[i,][[j]][[1]] = x_train[i,][[j]][[1]][mask]

# ROCKET

### Standarize time series length by repeating observations

In [64]:
fix_length <- function(x, expected_length) {
    current_length = length(x)
    
    if (current_length != expected_length) {
        x_args = strtoi(stringr::str_trim(names(x)), 10)
        y_args = as.vector(x)
        return(approx(x_args, y_args, 0:99)$y)
    }
    
    return(x)
}

In [65]:
max_time_series_length = max(lengths(x_train[[1]]))
time_series_count = length(x_train[[1]])
time_series_dims = length(x_train[1,])

for (i in 1 : time_series_count)
    for (j in 1 : time_series_dims)
        x_train[i,][[j]][[1]] = fix_length(x_train[i,][[j]][[1]], max_time_series_length)

In [66]:
utils <- import("sktime.utils.validation.panel")
x_train = utils$check_X(x_train, coerce_to_numpy=TRUE)

### Generate kernels

In [67]:
set.seed(5)
num_kernels = 50 #20000 # default value

num_columns = dim(x_train)[2]
num_timepoints = dim(x_train)[3]

lengths = array(as.integer(sample(c(7, 9, 11), num_kernels, replace = TRUE)))

limit = pmin(num_columns, lengths)

num_channel_indices = as.integer(2 ** np$random$uniform(0, log2(limit + 1)))

channel_indices = as.integer(rep(0, sum(num_channel_indices)))

weights = as.double(rep(0, sum(lengths * num_channel_indices)))

biases = array(as.double(rep(0, num_kernels)))
dilations = array(as.integer(rep(0, num_kernels)))
paddings = array(as.integer(rep(0, num_kernels)))

a1 = 1  # for weights
a2 = 1  # for channel_indices

for (i in 1 : num_kernels) {
    temp_length = lengths[i]
    temp_num_channel_indices = num_channel_indices[i]

    temp_weights = as.double(np$random$normal(0, 1, temp_num_channel_indices * temp_length))

    b1 = a1 + (temp_num_channel_indices * temp_length) - 1 
    b2 = a2 + temp_num_channel_indices - 1

    a3 = 1 # for weights (per channel)
    for (j in 1 : temp_num_channel_indices) {
        b3 = a3 + temp_length - 1
        temp_weights[a3 : b3] = temp_weights[a3 : b3] - mean(temp_weights[a3 : b3])
        a3 = b3 + 1
    }
        

    weights[a1 : b1] = temp_weights

    channel_indices[a2 : b2] = sample(0 : (num_columns - 1), temp_num_channel_indices)

    biases[i] = np$random$uniform(-1, 1)

    dilation = 2 ** np$random$uniform(0, log2((num_timepoints - 1) / (temp_length - 1)))
    dilation = as.integer(dilation)
    dilations[i] = dilation

    if (sample(0 : 1, 1) == 1)
        paddings[i] = as.integer(((temp_length - 1) * dilation) / 2)
    else paddings[i] = 0

    a1 = b1 + 1
    a2 = b2 + 1
}

    

### Transform x_train with created kernels

#### Normalize x_train

In [69]:
for(i in 1 : dim(x_train)[1])
    for (o in 1 : dim(x_train)[2]) {
        x_train[i,o,] = (x_train[i,o,] - mean(x_train[i,o,])) / (sd(x_train[i,o,]) + 1e-8) 
    }

, , 1

            [,1]         [,2]         [,3]         [,4]        [,5]
 [1,]  0.5230336  0.187996585  1.422752321  1.075813929  0.28947274
 [2,]  0.7552900 -0.495762729 -0.150667496  0.059601568 -0.94217658
 [3,] -2.3485478  0.692798648 -0.573974576  0.071376138  0.81206352
 [4,]  2.0549001  0.292038634  0.728407931 -1.572493580  0.56097983
 [5,]  1.0652823  0.664713526 -2.298083838 -1.187173725  1.05253278
 [6,] -1.6834047 -1.801550801 -4.051429076  0.699888934  2.21276078
 [7,]  3.5670970 -0.427216838  4.004308575  0.502274121  0.76391684
 [8,] -1.6494192  0.197582302 -3.425640293 -1.242836698 -1.24368238
 [9,] -1.0604761  0.521861543  0.383312666  3.104551769  0.70731416
[10,] -0.8890454  1.216350036  0.333099126 -0.091606086  1.83856645
[11,] -0.3275425  0.476991995  0.872534889  0.050887291 -0.14492288
[12,] -0.2182984  0.393750125  0.504261566 -0.042135967  0.02598923
[13,] -0.1048245  0.353808635  0.817163220 -0.041891425  0.03213141
[14,] -0.3255999  0.655215441  0.68515679

#### Transform x_train

In [None]:
num_instances = dim(x_train)[1]
num_columns _ = dim(x_train)[2]
num_kernels = lenght(lengths)

print(num_instaces)