# Preprocessing 
# Create a structured HDF5 file

In [3]:
import os
import sys
sys.path.append('../src')

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from mealib.utils import check_directory

In [27]:
sampling_rate = 20000
experiment_name = '2018-01-25'
# experiment_name = '2018-04-18'



# HDF5

In [28]:
source_file = '../data/raw_data/'+experiment_name+'/'+experiment_name+'_merged.hdf5'
timestamps_raw = h5py.File(source_file, 'r')

event_list = pd.read_csv('../data/sync/'+experiment_name+'/event_list/event_list_'+experiment_name+'.csv')
event_list['tail'] = event_list['start_event'][1:].values.tolist() + event_list['end_event'][-1:].values.tolist()
event_list.replace(np.nan, '', inplace=True)
event_list




Unnamed: 0,file,presentation_number,n_frames,start_event,end_event,event_duration,inter_event_duration,protocol_name,repetition_name,repeated_frame,#repeated_frame,tail
0,,0,0,0,6038432,301.92160,0.00000,scotopic,,,,6038431
1,000.txt,1,18613,6038431,12268075,311.48220,0.00000,photopic,,,,12437221
2,001.txt,2,2100,12437221,13140116,35.14475,8.45730,chirp,rep_00,,,13220111
3,002.txt,2,2100,13220111,13923006,35.14475,3.99975,chirp,rep_01,,,14003000
4,003.txt,2,2100,14003000,14705895,35.14475,3.99970,chirp,rep_02,,,14785890
5,004.txt,2,2100,14785890,15488784,35.14470,3.99975,chirp,rep_03,,,15568779
6,005.txt,2,2100,15568779,16271674,35.14475,3.99975,chirp,rep_04,,,16351668
7,006.txt,2,2100,16351668,17054563,35.14475,3.99970,chirp,rep_05,,,17134558
8,007.txt,2,2100,17134558,17837453,35.14475,3.99975,chirp,rep_06,,,17917447
9,008.txt,2,2100,17917447,18620342,35.14475,3.99970,chirp,rep_07,,,18700337


In [29]:
output_file = '../data/structured_data/structured_data_'+experiment_name+'_.hdf5'
!rm $output_file
ts = h5py.File(output_file, 'a')

In [30]:
for kunit in timestamps_raw['spiketimes']:
    ts_unit_raw = timestamps_raw['spiketimes/'+kunit][:,0]
    for start,end,nframes,name,repetition in event_list[['start_event','tail','n_frames','protocol_name','repetition_name']].values:
        filter_ts = (ts_unit_raw > start)*(ts_unit_raw <= end)
        ds_data = ts_unit_raw[filter_ts] - start
        if repetition:
            ds_name = '/response/{}/{}/{}'.format(name,kunit,repetition)
            ts.create_dataset(ds_name,data=ds_data,dtype=np.int32)
            ts[ds_name].attrs['nframes'] = nframes
        else:
            ds_name = '/response/{}/{}'.format(name,kunit)
            ts.create_dataset(ds_name,data=ds_data,dtype=np.int32)
            ts[ds_name].attrs['nframes'] = nframes
        ts_unit_raw = ts_unit_raw[~filter_ts]

In [31]:
for presentn, start,end,name,repetition in event_list[['presentation_number','start_event','end_event','protocol_name','repetition_name']].values:
    ts['/response/'+name].attrs['start_time'] = start
    ts['/response/'+name].attrs['end_time'] = end
    ts['/response/'+name].attrs['duration'] = (end-start)/sampling_rate
    ts['/response/'+name].attrs['sample_rate'] = sampling_rate
    ts['/response/'+name].attrs['presentation_number'] = presentn
    

## Set Stimuli

In [32]:
stim_file = '../data/stim/fullfiledchirp.txt'
stim_name = 'chirp'
stimulus = np.loadtxt(stim_file)
ts.create_dataset('/stimulus/chirp',data=stimulus,dtype=np.float32)


<HDF5 dataset "chirp": shape (2101,), type "<f4">

In [33]:
wn_file = '../data/stim/checkerboard/stim_2018-01-25.hdf5'
with h5py.File(wn_file,'r') as wn: 
    swn = ts.create_dataset('/stimulus/checkerboard', data=wn['checkerboard'], dtype=np.uint8,chunks=(35,35,1,1),compression="gzip")
    swn.attrs['sample_rate'] = 60.0
    swn.attrs['title'] = 'White noise'

## Set sync time


In [34]:
for kidx,(start,name,repetition) in enumerate(event_list[['start_event','protocol_name','repetition_name']].values):
    sync_file = '../data/sync/'+experiment_name+'/event_list/times/{:03d}.txt'.format(kidx)
    ds_sync = np.loadtxt(sync_file) - start
    
    if repetition:
        ds_name = '/sync/{}/{}'.format(name,repetition)
        ts.create_dataset(ds_name,data=ds_sync,dtype=np.int32) 
        ts[ds_name].attrs['start'] = start
        ts[ds_name].attrs['col_name'] = 'start_frame,end_frame'
    else:
        ds_name = '/sync/{}'.format(name)
        ts.create_dataset(ds_name,data=ds_sync,dtype=np.int32)
        ts[ds_name].attrs['start'] = start
        ts[ds_name].attrs['col_name'] = 'start_frame,end_frame'

  app.launch_new_instance()


In [36]:
ts.flush()
ts.close()