In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
from scipy import pi
from scipy.io import loadmat
from scipy.fft import fft, ifft, fftfreq

In [None]:
import pandas as pd

In [None]:
from ssvepcca.definitions import (
    NUM_BLOCKS,
    NUM_TARGETS,
    NUM_SAMPLES,
    NUM_ELECTRODES,
    SAMPLE_FREQ,
    SAMPLE_T,
    TARGET_FREQUENCY,
    TARGET_PHASE,
    ELECTRODE_INDEX
)

In [None]:
from ssvepcca.utils import (_get_time_column,
                   # transform_mat_to_pandas,
                  get_harmonic_columns)

# Exploring Sample

## Parsing input dataset for S1 (subject 1)

In [None]:
s1 = loadmat("dataset_chines/S1.mat")

In [None]:
s1.keys()

In [None]:
s1['__header__']

In [None]:
s1['__version__']

In [None]:
s1['__globals__']

In [None]:
m1 = s1['data']

In [None]:
m1.shape

In [None]:
m1[:, :, 0, 0]

In [None]:
df1 = transform_mat_to_pandas(m1)

In [None]:
df1

In [None]:
df = df1.set_index(["block", "target", "time_ms"])

In [None]:
df.loc[0,0].loc[0:1000]["electrode_0"].plot()

In [None]:
df.loc[0,0]["electrode_20"].plot()

In [None]:
df.loc[0,6]["electrode_20"].plot()

## Playing with FFT, just to see what happens

In [None]:
N = NUM_SAMPLES # Number of sample points
T = SAMPLE_T # sample spacing 
y = df.loc[0,6]["electrode_50"].values

yf = fft(y)
xf = fftfreq(N, T)[:N//8]
import matplotlib.pyplot as plt
plt.plot(xf, 2.0/N * np.abs(yf[:N//8]))
plt.grid()
plt.show()

In [None]:
from scipy.signal import blackman
import matplotlib.pyplot as plt

xf = fftfreq(N, T)[:N//2]
yf = fft(y)
w = blackman(N)
ywf = fft(y*w)

plt.semilogy(xf[1:N//8], 2.0/N * np.abs(yf[1:N//8]), '-b')
plt.semilogy(xf[1:N//8], 2.0/N * np.abs(ywf[1:N//8]), '-r')
plt.legend(['FFT', 'FFT w. window'])
plt.grid()
plt.show()

## Exploring Phase Dataset

In [None]:
freqphase = loadmat("dataset_chines/Freq_Phase.mat")

In [None]:
freqphase

In [None]:
freqphase["phases"].shape

In [None]:
freqphase["freqs"].reshape(-1, 8)

In [None]:
freqphase["phases"].round(4).reshape(-1, 8)

# POC with CCA (original algorithm)

**Plan:**

- get sin and cos components (6 components/columns)
- for each observation, we are going to estimate the correlation with all possible frequencies.

In [None]:
np.sin(2*pi)

In [None]:
freqs = freqphase["freqs"].reshape(-1)
phases =  freqphase["phases"].reshape(-1)

In [None]:
BLOCK = 0
TARGET = 0

In [None]:
obs_dataframe = df.loc[BLOCK, TARGET]

In [None]:
obs_dataframe

In [None]:
obs_dataframe['time_index'] * 1 / 250

In [None]:
m1.shape

In [None]:
trial = m1[:, :, TARGET, BLOCK].T

In [None]:
trial.shape

In [None]:
get_harmonic_columns(2).shape

In [None]:
harmonics_8 = get_harmonic_columns(8)

In [None]:
harmonics_8.shape

In [None]:
from sklearn.cross_decomposition import CCA


In [None]:
from definitions import ELECTRODE_INDEX
from parameters import electrode_list_fbcca

In [None]:
electrode_index = [ELECTRODE_INDEX[electrode_name] for electrode_name in electrode_list_fbcca]

In [None]:
from learners import CCASingleComponent, CCACorrelation

In [None]:
trial[:, electrode_index].shape

In [None]:
m = CCASingleComponent()
m.predict_proba(trial[:, electrode_index])

In [None]:
m = CCASingleComponent()
m.predict(trial[:, electrode_index])

In [None]:
model = CCACorrelation(n_components=2)

model.fit(trial[:, electrode_index], get_harmonic_columns(8))

In [None]:
trial

In [None]:
model.fit_correlation(trial[:, electrode_index], get_harmonic_columns(8))

In [None]:
get_harmonic_columns?

In [None]:
model2 = CCACorrelation(n_components=2)
model2.fit_correlation(trial[:, electrode_index], get_harmonic_columns(8))

In [None]:
import statsmodels.multivariate.cancorr

In [None]:
cca_stats = statsmodels.multivariate.cancorr.CanCorr(
    endog = get_harmonic_columns(8),
    exog = trial[:, electrode_index],
)
cca_stats.cancorr

## Design experiments

We need to benchmark different algorithms.
For this, we need to do a workflow of:

- For each subject (35)
    - For each key (40)
        - If the model need training
            - For each BLOCK (6)
                - Train a model on the other 5 BLOCKS (leave-one-out scheme)
                - Predict the label of this trial
                - Store prediction in a response object
        - If the model doesn't need training
            - For each BLOCK (6)
                - Predict label of this BLOCK
                - Store prediction in a response object

*We need to filter the electrodes somewhere. It can be learned for each subject. If learned, it should be done within the leave-one-out scheme.*

Output: matrix with dimension (40, 6) for each subject

Create a function to calculate:

- accuracy
- confusion matrix

In [None]:
m1[0, 0, 0, :]

In [None]:
np.identity(6, dtype=bool)[0]

In [None]:
masked = (np.identity(6)[0] - 1) * -1

In [None]:
masked

In [None]:
m1[:, :, :, ~np.identity(6, dtype=bool)[5]].shape

In [None]:
m1_key = m1[:, :, 0, :]

In [None]:
m1_key.shape

In [None]:
np.empty([5])

In [None]:
m1_key.shape == (64, 1500, 6)

**Learner**

The learner should comprise everything needed to estimate a prediction. It should be a full pipeline.
If, for example, it is necessary to apply a filtering technique, such as filterbank, it should be learned and applied by the same object. It is an object that can learn everything necessary.

It can be either an object or a curried function. Good moment to learn how to use SKLEARN PIPELINES and custom transformers. Custom transformers are a very cool feature to use.

If need to learn with labels, it should be trained with other observations with same target.

In [None]:
from pipelines import *

In [None]:
test_fit_predict

In [None]:
M1 = m1.T

In [None]:
M1.shape

In [None]:
M1[0, 0, :, :]

In [None]:
m = CCASingleComponent()

In [None]:
r = test_fit_predict(m, M1)

In [None]:
r.shape

In [None]:
r[:, 0]

In [None]:
r.T.shape

In [None]:
r.T[0, 0]

In [None]:
r.T

In [None]:
count = 0
for l in range(40):
    for c in range(6):
        if r.T[l, c] == l:
            count += 1

count, count/(40*6)

In [None]:
np.array(TARGET_FREQUENCY).reshape(5, -1)

In [None]:
np.array(range(40)).reshape(5,-1)

In [None]:
np.array(TARGET_PHASE).round(3).reshape(5, -1)