# Load dependencies

#### Load rocket code

In [None]:
source("rocket.R")

#### Load libraries

In [None]:
library(reticulate)

In [None]:
data_io <- import("sktime.datasets._data_io")

# Define function to load the dataset

In [None]:
dataset_dir_path = file.path(getwd(), "datasets")

load_dataset <- function(dataset_name, type) {
    file_name = paste(dataset_name, "_", type, ".arff", sep="")
    
    dataset_path = file.path(dataset_dir_path, dataset_name, file_name)
    data = data_io$load_from_arff_to_dataframe(dataset_path, "nested_univ")
}

# Test solution

### Example 1 - [BasicMotions](https://timeseriesclassification.com/description.php?Dataset=BasicMotions) with some data removed 

### Data preparing

In [None]:
dataset_name = "BasicMotions"

data = load_dataset(dataset_name, "TRAIN")

x_train = data[[1]]
y_train = data[[2]]

#### Truncate some of the time series in order to complicate the problem

- 33.(3)% of sequences left at 50% length
- 33.(3)% of sequences truncated to 40% length
- 33.(3)% of sequences truncated to 30% length

In [None]:
truncate_time_serie <- function(time_serie, coefficient) {
    time_serie_length = length(time_serie[[1]][[1]])
    time_serie_dims = length(time_serie)
    
    number_of_samples = ceiling(coefficient * time_serie_length / 100) - 2 # upper and lower bound must be added manually
    
    mask = sort(c(0,
                  time_serie_length - 1,
                  sample(1 : (time_serie_length - 2), number_of_samples)))

    for (j in 1 : time_serie_dims)
        time_serie[[j]][[1]] = time_serie[[j]][[1]][mask]
    
    return(time_serie)
}

#### Stratify rows choosen for truncating in order to ensure, that all classes will have the same amount of members truncated to specific length

In [None]:
original_length = length(x_train[1,][[1]][[1]]) # save the original time series length

for (class in unique(y_train)) {
    indexes = which(y_train == class)
    
    for (i in 1 : length(indexes)) {
        if (indexes[i] == 1) next
        if (i <= length(indexes) / 3) {
            x_train[indexes[i],] = truncate_time_serie(x_train[indexes[i],], 50)
        }
        else if (i <= 2 * length(indexes) / 3) {
            x_train[indexes[i],] = truncate_time_serie(x_train[indexes[i],], 40)
        }
        else {
            x_train[indexes[i],] = truncate_time_serie(x_train[indexes[i],], 30)
        }
    }
}

### Solution

#### First, standarize all time series to have the same length

In [None]:
x_train = fill_missing_data(x_train)

In [None]:
kernels = generate_kernels(x_train)

In [None]:
x_train = apply_kernels(x_train, kernels)

In [None]:
linear_model <- import("sklearn.linear_model")

classifier = linear_model$RidgeClassifierCV(alphas=pracma::logspace(-3, 3, 10), normalize=TRUE)
classifier$fit(x_train, y_train)

In [None]:
data = load_dataset(dataset_name, "TEST")

x_test = data[[1]]
y_test = data[[2]]

x_test = apply_kernels(x_test, kernels)

In [None]:
classifier$score(x_test, y_test)

#### Result: approx. 92% accuracy

### Example 2 - [Cricket](https://timeseriesclassification.com/description.php?Dataset=Cricket)

In [None]:
dataset_name = "Cricket"

data = load_dataset(dataset_name, "TRAIN")

x_train = data[[1]]
y_train = data[[2]]

In [None]:
kernels = generate_kernels(x_train)

In [None]:
x_train = apply_kernels(x_train, kernels)

In [None]:
linear_model <- import("sklearn.linear_model")

classifier = linear_model$RidgeClassifierCV(alphas=pracma::logspace(-3, 3, 10), normalize=TRUE)
classifier$fit(x_train, y_train)

In [None]:
data = load_dataset(dataset_name, "TEST")

x_test = data[[1]]
y_test = data[[2]]

x_test = apply_kernels(x_test, kernels)

In [None]:
classifier$score(x_test, y_test)

#### Result: approx. 95% accuracy

### Example 3 - [RacketSports](https://timeseriesclassification.com/description.php?Dataset=RacketSports)

In [None]:
dataset_name = "RacketSports"

data = load_dataset(dataset_name, "TRAIN")

x_train = data[[1]]
y_train = data[[2]]

In [None]:
kernels = generate_kernels(x_train)

In [None]:
x_train = apply_kernels(x_train, kernels)

In [None]:
linear_model <- import("sklearn.linear_model")

classifier = linear_model$RidgeClassifierCV(alphas=pracma::logspace(-3, 3, 10), normalize=TRUE)
classifier$fit(x_train, y_train)

In [None]:
data = load_dataset(dataset_name, "TEST")

x_test = data[[1]]
y_test = data[[2]]

x_test = apply_kernels(x_test, kernels)

In [None]:
classifier$score(x_test, y_test)

#### Result: approx. 80% accuracy