In [None]:
from seqdata.core import *
from seqdata.model import *
from fastai2.basics import *
from fastai2.callback.progress import *
import h5py

In [None]:
#hide
%load_ext line_profiler

In [None]:
def DfHDFCreateWindows(win_sz,stp_sz, clm, fixed_start = False, fixed_end = False):
    '''create windows of sequences, splits sequence into multiple items'''
    def _inner(df):
        if fixed_start and fixed_end: raise Exception

        lst_df = [] #new dataframe for every row
        for idx, row in df.iterrows():
            with h5py.File(row.path,'r') as f:
                #TODO make clm optional
#                 if clm == '': 
#                     clm = list(f.keys())[0]
                f_len = f[clm].shape[1]

                n_win = ((f_len-win_sz)//stp_sz)+1
                tmp_df = df.iloc[[idx]*n_win]; #duplicate the row of the df multiple times by reference
                lst_idx = np.arange(n_win)

                #every row is a reference so we need to suppress the warning messages while copying
                pd.options.mode.chained_assignment = None
                tmp_df['l_slc'] = lst_idx*stp_sz
                tmp_df['r_slc'] = lst_idx*stp_sz + win_sz
                pd.options.mode.chained_assignment = 'warn'

                lst_df.append(tmp_df)

        res_df = pd.concat(lst_df)
        return res_df
    
    return _inner

In [None]:
hdf_files = get_hdf_files('/mnt/Data/Systemidentification/Orientation_Estimation/')
src_df = df_source_items(hdf_files,[DfHDFCreateWindows(win_sz=1000+1,stp_sz=10,clm='acc')])
src_df.head()

Unnamed: 0,path,l_slc,r_slc
0,/mnt/Data/Systemidentification/Orientation_Estimation/experiment3_linear_fast_b0_results_myon.mat.hdf5,0,1001
0,/mnt/Data/Systemidentification/Orientation_Estimation/experiment3_linear_fast_b0_results_myon.mat.hdf5,10,1011
0,/mnt/Data/Systemidentification/Orientation_Estimation/experiment3_linear_fast_b0_results_myon.mat.hdf5,20,1021
0,/mnt/Data/Systemidentification/Orientation_Estimation/experiment3_linear_fast_b0_results_myon.mat.hdf5,30,1031
0,/mnt/Data/Systemidentification/Orientation_Estimation/experiment3_linear_fast_b0_results_myon.mat.hdf5,40,1041


In [None]:
splitter = FuncSplitter(lambda o: 'experiment2' in Path(o).name)

In [None]:
from functools import lru_cache

def HDF2Sequence(c_names,cached=True):
    def _extract_sequence(hdf_path,dataset = None, l_slc = None, r_slc= None):
        with h5py.File(hdf_path,'r') as f:
            ds = f if dataset is None else f[dataset]
            l_array = [ds[n][:,l_slc:r_slc] for n in c_names]
            seq = np.vstack(l_array)
            return tensor(seq)
        
    _exseq = lru_cache(maxsize=None)(_extract_sequence) if cached else _extract_sequence
    
    def _extract_df_sequence(item):
        if not isinstance(item,pd.Series):
            return _exseq(str(item))
        
        path = item.path
        dataset = item.dataset if hasattr(item,'dataset') else None
        l_slc = item.l_slc if hasattr(item,'l_slc') else None
        r_slc = item.r_slc if hasattr(item,'r_slc') else None
        
        if cached:
            return _exseq(path,dataset)[:,l_slc:r_slc]
        else:
            return _exseq(path,dataset,l_slc,r_slc)

    return _extract_df_sequence

In [None]:
class SeqSlice(Transform):
    '''Take a slice from an array-like object. Useful for e.g. shifting input and output'''
    def __init__(self, l_slc=None,r_slc=None):
        self.l_slc,self.r_slc = l_slc,r_slc
        
    def encodes(self, o): return o[:,self.l_slc:self.r_slc]

In [None]:
tfms=[  [HDF2Sequence(['acc','gyr','mag','opt_quat']),SeqSlice(l_slc=1),toTensorSequencesInput],
        [HDF2Sequence(['opt_quat']),SeqSlice(r_slc=-1),toTensorSequencesOutput]]
dsrc = DataSource(src_df,tfms=tfms,splits=splitter(src_df.path))

In [None]:
# db = dsrc.databunch(bs=128,after_batch=[SeqNoiseInjection(std=[1.1,0.01]),Normalize(axes=[0,1])])
db = dsrc.databunch(bs=128,after_batch=[Cuda(),Normalize(axes=[0,2])])
db.one_batch()[0].shape

torch.Size([128, 13, 1000])

In [None]:
class SimpleGRU(nn.Module):
    def __init__(self,input_size,output_size,num_layers=1,hidden_size=100):
        super().__init__()
        self.rnn = nn.GRU(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers,batch_first=True)
        self.final = nn.Conv1d(hidden_size,output_size,kernel_size=1)

    def forward(self, x):
        out = x.transpose(1,2)
        out,_ = self.rnn(out)
#         import pdb; pdb.set_trace()
        out = out.transpose(1,2)
        out = self.final(out)
        return out

In [None]:
class DualGRU_StateEstimator(nn.Module):
    def __init__(self,input_size,output_size=1,output_layer=1,hidden_size=100,hidden_layer=4):
        super().__init__()
        self.output_layer = output_layer
        self.output_size = output_size
        self.rnn = nn.GRU(input_size=input_size,hidden_size=hidden_size,
                                    num_layers=hidden_layer, batch_first=True)
#         self.rnn = QRNN(input_size=input_size,hidden_size=hidden_size,
#                                     n_layers=hidden_layer, batch_first=True,zoneout=0.4)
        self.multi = nn.Conv1d(hidden_size,hidden_size,kernel_size=1)      
        self.final = nn.Conv1d(hidden_size,output_size*output_layer,kernel_size=1)

    def forward(self, x):
        out = x.transpose(1,2)
#         out,_,_ = self.rnn(out)        
        out,_ = self.rnn(out) 
        
        
        out = out.transpose(1,2) 
        out = self.multi(out).relu()
        out = self.final(out)
#         import pdb; pdb.set_trace()
        out = out.view((out.shape[0],self.output_size,self.output_layer,-1))
        out = out.permute(2,0,1,3)
        
        return out

class DualGRU_Predictor(nn.Module):
    def __init__(self,input_size,output_size=1,hidden_size=100,hidden_layer=1):
        super().__init__()
        self.rnn = nn.GRU(input_size=input_size,hidden_size=hidden_size,
                                   num_layers=hidden_layer, batch_first=True)
#         self.rnn = QRNN(input_size=input_size,hidden_size=hidden_size,
#                                     n_layers=hidden_layer, batch_first=True,zoneout=0.4,window=1)
        self.multi = nn.Conv1d(hidden_size,100,kernel_size=1)      
        self.final = nn.Conv1d(100,output_size,kernel_size=1)

    def forward(self, x,init_state=None):
        out = x.transpose(1,2)
        #import pdb; pdb.set_trace()
#         out,_,hidden = self.rnn(out,init_state)
#         hidden = hidden.transpose(2,3) 
#         out = out.transpose(1,2)
#         out = self.multi(out).relu()
        
        hidden,_ = self.rnn(out,init_state) 
        hidden = hidden.transpose(1,2)
        out = self.multi(hidden).relu() 
        
        out = self.final(out)
        out = out if out.shape[1] > 1 else out[:,0]
        return out, hidden

class DualGRU(nn.Module):
    def __init__(self,input_size,init_size,hidden_size=100,init_layer=3,pred_layer=1):
        super().__init__()
        self.init_size = init_size
        self.state_estimator = DualGRU_StateEstimator(input_size,output_size=hidden_size,output_layer=pred_layer,
                                              hidden_size=hidden_size,hidden_layer=init_layer)
        self.predictor = DualGRU_Predictor(input_size-4,output_size=4,
                                   hidden_size=hidden_size,hidden_layer=pred_layer)

    def forward(self, x,init_state = None):
        est_states = self.state_estimator(x)
        
#         import pdb; pdb.set_trace()
        if init_state is None:
            init_state = est_states[...,self.init_size-1].contiguous()
            pred_vals = x[:,:-4,self.init_size:]
        else:
            pred_vals = x[:,:-4]
        out,pred_states = self.predictor(pred_vals,init_state)
#         print(pred_states.shape)
        
        return out
    

In [None]:
lrn = Learner(db,SimpleGRU(13,4,num_layers=3),loss_func=nn.MSELoss())

In [None]:
class DualQRNNTrainer(Callback):

    def begin_batch(self):
#         import pdb; pdb.set_trace()
        self.yb = (self.yb[0][...,100:],)

In [None]:
lrn = Learner(db,DualGRU(13,100,4),loss_func=nn.MSELoss(),cbs=[DualQRNNTrainer()])

In [None]:
lrn.fit(2)

epoch,train_loss,valid_loss,time
0,0.0,00:01,


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (900) must match the size of tensor b (1000) at non-singleton dimension 2