In [1]:
import scipy.io as sio
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
def shift_stim(stims : pd.Series, shift : int):
    # shifts all stimuli by an amount
    stims[stims != 0] += shift
    return stims

In [3]:
df = pd.DataFrame([])
data_paths = ['../signatures_data/s1/S1_E2_A1.mat', '../signatures_data/s1/S1_E3_A1.mat']
tot_stim = 0
for i, data_path in enumerate(data_paths):
    mat = sio.loadmat(data_path)
    if i == 0:
        df = pd.DataFrame(mat['emg'])
        df['stimulus'] = mat['restimulus']
        df['repetition'] = mat['repetition']
    else:
        df2 = pd.DataFrame(mat['emg'])
        df2['stimulus'] = shift_stim(pd.DataFrame(mat['restimulus']), tot_stim)
        df2['repetition'] = mat['repetition']
        df = pd.concat([df, df2], ignore_index=True)
    tot_stim += len(np.unique(mat['restimulus'])) - 1

df.head()
    

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,stimulus,repetition
0,-20.0,7.0,-11.0,2.0,-6.0,1.0,-1.0,3.0,-3.0,-6.0,-1.0,0.0,2.0,-4.0,-14.0,-1.0,0,0
1,-8.0,-20.0,-27.0,-6.0,-7.0,6.0,5.0,-3.0,-7.0,-3.0,-6.0,-5.0,0.0,-7.0,2.0,-8.0,0,0
2,8.0,18.0,30.0,6.0,8.0,37.0,-45.0,-12.0,-5.0,-10.0,-2.0,1.0,7.0,6.0,-9.0,0.0,0,0
3,-4.0,-13.0,-4.0,-6.0,-5.0,-40.0,21.0,-9.0,17.0,14.0,-24.0,0.0,0.0,-16.0,-4.0,10.0,0,0
4,-5.0,3.0,5.0,-4.0,1.0,-8.0,-22.0,-9.0,-3.0,16.0,38.0,-4.0,-17.0,-5.0,-15.0,-9.0,0,0


In [4]:
np.unique(df['stimulus'])

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40], dtype=int8)

In [5]:
from normalizers import StandardScaler

In [6]:
train_set = [1, 3, 4, 6]
validation_set = [2, 5]

In [16]:
df[(df.stimulus==1) & (df.repetition==6)]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,stimulus,repetition
10922,1.0,0.0,4.0,1.0,0.0,-1.0,-1.0,-3.0,-2.0,-2.0,0.0,-1.0,-1.0,-1.0,-3.0,-1.0,1,6
10923,0.0,0.0,-1.0,0.0,-1.0,-3.0,-4.0,-24.0,-1.0,-2.0,-2.0,-1.0,0.0,1.0,-1.0,-2.0,1,6
10924,7.0,-16.0,-3.0,-3.0,-1.0,-2.0,3.0,30.0,-1.0,-1.0,-2.0,-1.0,-1.0,-1.0,1.0,0.0,1,6
10925,-9.0,10.0,-1.0,-2.0,-3.0,-2.0,-3.0,-20.0,2.0,-1.0,-3.0,-1.0,0.0,2.0,3.0,1.0,1,6
10926,-2.0,6.0,3.0,2.0,2.0,-1.0,0.0,-5.0,1.0,1.0,-1.0,-1.0,-1.0,-2.0,0.0,0.0,1,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11880,1.0,-12.0,3.0,-2.0,0.0,0.0,1.0,1.0,10.0,0.0,-1.0,0.0,-1.0,1.0,-3.0,-1.0,1,6
11881,16.0,48.0,4.0,3.0,-1.0,2.0,-2.0,-3.0,-5.0,-2.0,3.0,-1.0,4.0,1.0,0.0,-1.0,1,6
11882,-4.0,-9.0,-4.0,-1.0,-1.0,-2.0,-2.0,-1.0,3.0,-4.0,-3.0,-1.0,0.0,1.0,-2.0,-1.0,1,6
11883,14.0,-7.0,-1.0,-3.0,-3.0,-1.0,0.0,2.0,0.0,-3.0,-6.0,0.0,-1.0,-2.0,-1.0,1.0,1,6


In [55]:
class Path():
    def __init__(self, vals):
        self.vals = vals.reset_index(drop=True, inplace=False)
    def compute_signature(self, depth):
        pass

class DB5Path(Path):
    def __init__(self, vals, exercise, repetition, dims=None):
        super().__init__(vals)
        self.exercise = exercise
        self.repetition = repetition
        if dims is None:
            self.dims = vals.shape[1] - 2
        else:
            self.dims = dims
    
    def naive_padding(self, length) -> np.array:
        tempdf = pd.DataFrame(np.random.standard_normal(size = (length, self.dims)))
        tempdf['stimulus'] = pd.Series(np.ones(length) * self.exercise)
        tempdf['repetition'] = pd.Series(np.ones(length) * self.repetition)
        return tempdf
    
    def get_windows(self, window_size, overlap, use_padding=False) -> np.array:
        """
        returns a list of dataframes
        """
        begin = 0
        ans = []
        n = len(self.vals)
        while(begin < n):
            if (begin + window_size < n):
                ans.append(self.vals[begin:begin + window_size])
            else:
                # need to pad it out
                if use_padding:
                    overflow = (n - begin) % window_size
                    ans.append(pd.concat([self.vals[begin:], self.naive_padding(window_size - overflow)], ignore_index=True))
                else:
                    pass  # other option is to drop the last bit
            begin += window_size - overlap
        return ans


In [56]:
stimulus = 1
repetition = 6
dbpth = DB5Path(df[(df.stimulus == stimulus) & (df.repetition == repetition)], stimulus, repetition)

In [57]:
print(dbpth.vals.shape)
print(dbpth.naive_padding(5))
dbpth.dims

(963, 18)
          0         1         2         3         4         5         6  \
0  1.177818 -0.202362  0.657317  0.830659  0.249517  0.369939  1.263063   
1  1.239502 -0.393078  1.133874 -1.034681  0.977566  0.379843  0.805623   
2  0.498850 -1.267622 -0.195033 -1.620924  2.538698  1.621850 -1.471775   
3  1.223639  1.449185  1.790863 -0.372711 -0.563198 -0.920076  1.254354   
4 -0.046064  0.424841 -0.590879  0.864259 -0.277455  0.466648  0.850647   

          7         8         9        10        11        12        13  \
0 -0.120531  0.358684 -0.225444  0.961903  0.284919 -0.490263 -0.543437   
1 -1.911507 -1.149586 -0.468929 -0.215170  1.100312 -0.053008 -0.193147   
2  0.144497 -0.317846  0.857907 -0.408160 -0.269760 -0.640543 -1.918017   
3 -1.168006 -0.541364  0.289163 -1.991617 -2.018351 -0.014034 -0.520637   
4  0.466446 -0.556266 -2.117969  0.868655 -1.488336 -1.722726 -0.850623   

         14        15  stimulus  repetition  
0  0.020820 -2.141624       1.0         6.

16

In [58]:
dbpth.vals.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,stimulus,repetition
0,1.0,0.0,4.0,1.0,0.0,-1.0,-1.0,-3.0,-2.0,-2.0,0.0,-1.0,-1.0,-1.0,-3.0,-1.0,1,6
1,0.0,0.0,-1.0,0.0,-1.0,-3.0,-4.0,-24.0,-1.0,-2.0,-2.0,-1.0,0.0,1.0,-1.0,-2.0,1,6
2,7.0,-16.0,-3.0,-3.0,-1.0,-2.0,3.0,30.0,-1.0,-1.0,-2.0,-1.0,-1.0,-1.0,1.0,0.0,1,6
3,-9.0,10.0,-1.0,-2.0,-3.0,-2.0,-3.0,-20.0,2.0,-1.0,-3.0,-1.0,0.0,2.0,3.0,1.0,1,6
4,-2.0,6.0,3.0,2.0,2.0,-1.0,0.0,-5.0,1.0,1.0,-1.0,-1.0,-1.0,-2.0,0.0,0.0,1,6


In [59]:
dbpth.vals[0:3]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,stimulus,repetition
0,1.0,0.0,4.0,1.0,0.0,-1.0,-1.0,-3.0,-2.0,-2.0,0.0,-1.0,-1.0,-1.0,-3.0,-1.0,1,6
1,0.0,0.0,-1.0,0.0,-1.0,-3.0,-4.0,-24.0,-1.0,-2.0,-2.0,-1.0,0.0,1.0,-1.0,-2.0,1,6
2,7.0,-16.0,-3.0,-3.0,-1.0,-2.0,3.0,30.0,-1.0,-1.0,-2.0,-1.0,-1.0,-1.0,1.0,0.0,1,6


In [60]:
windows = dbpth.get_windows(window_size=200, overlap=100)
len(windows)

10

In [61]:
windows[9].tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,stimulus,repetition
195,-0.499553,-0.517547,0.813111,1.184296,1.342754,0.212247,0.27526,0.354384,0.012836,-1.038091,-0.884455,0.786654,-0.975253,0.662505,0.161751,1.767869,1.0,6.0
196,0.306135,-0.34951,1.24122,1.071233,0.967366,0.231045,-1.23729,0.936516,-0.002978,1.379937,-0.332829,1.376563,0.658561,-0.411502,0.152009,1.350458,1.0,6.0
197,-0.86754,0.130078,-0.399698,0.434565,0.869342,-1.336722,1.240327,-0.256245,-1.03079,0.04968,0.011189,-1.406019,2.23061,1.089296,2.285845,1.679248,1.0,6.0
198,-0.887737,0.583284,-1.019931,0.421919,1.403481,-1.041725,1.478084,1.075591,-1.352997,1.143191,-0.742052,-0.367478,-0.324183,0.97807,2.110739,0.355531,1.0,6.0
199,0.637745,-0.117522,1.039866,0.631023,0.571277,0.514338,0.1155,-0.448425,-0.046305,-0.113858,-0.293361,0.617487,-0.931303,1.03776,-0.831383,0.110805,1.0,6.0
