In [1]:
import pandas
import h5py

class espion_file:
    """Loader for erg ESPION CSV files into Python"""
    def __init__(self, filepath, filename, species, genotype):
        self.basedir = "/Users/angueyraaristjm/Documents/LiData/invivoERG/"
        self.filepath = filepath
        self.filename = filename
        self.savepath = self.basedir + self.filepath + "/"
        self.fullpath = self.savepath + self.filename + ".csv"
        self.species = species
        self.genotype = genotype
        self.metadata = self.pull_metadata()
        self.datatable = self.pull_datatable()
        self.data = self.pull_data()
        self.HDF5remap()
    
    def pull_metadata(self):
        # pull and parse metadata information
        csvparams = pandas.read_csv(self.fullpath, header=1, usecols=[0, 1], nrows=10, low_memory=False)
        csvparams = csvparams.dropna()
        metadata = dict()
        intfields = ["Steps", "Channels"]
        datefields = ["DOB", "Date performed"]
        for i in range(1, 10):
            if csvparams.Parameter[i] in intfields:
                metadata[csvparams.Parameter[i]] = int(csvparams.Value[i])
            elif csvparams.Parameter[i] in datefields:
                metadata[csvparams.Parameter[i]] = pandas.to_datetime(csvparams.Value[i])
            elif csvparams.Parameter[i] == "Family Name":
                metadata["ID"] = csvparams.Value[i]
            else: 
                metadata[csvparams.Parameter[i]] = csvparams.Value[i]
        metadata['Species'] = self.species
        metadata['genotype'] = self.genotype
        return metadata
                
    def pull_datatable(self):
        # pull datatable to parse data
        fullcsv = pandas.read_csv(self.fullpath, header=0, low_memory=False)
        if "Data Table" in fullcsv:
            #print("Data Table is Right")
            datatable = pandas.read_csv(self.fullpath, header=1, usecols=[3, 4, 5, 8], low_memory=False)
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        elif fullcsv.iloc[12, 0] == "Data Table":
#         elif fullcsv.ix[12, 0] == "Data Table":
            #print("Data Table is Below")
            datatable = pandas.read_csv(self.fullpath, header=1, usecols=[0, 1, 2, 5], skiprows=13, low_memory=False)
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        elif fullcsv.iloc[13, 0] == "Data Table":
#         elif fullcsv.ix[13, 0] == "Data Table":
            #print("Data Table is Below")
            datatable = pandas.read_csv(self.fullpath, header=1, usecols=[0, 1, 2, 5], skiprows=14, low_memory=False)
            datatable = datatable.dropna()
            datatable = datatable.astype(int)
        else:
            print("Did not find datatable")
        return datatable
    
    def pull_data(self):
        # parse data based on data table
        fullcsv = pandas.read_csv(self.fullpath, header=0, low_memory=False)
        data = dict()
        for step in range(self.metadata['Steps']):
            stepname = "Step" + str(step+1).zfill(2)
            # print(stepname)
            ch1start = self.datatable.Column[(self.datatable.Step==(int(step+1))) & (self.datatable.Chan==1)]
            ch2start = self.datatable.Column[(self.datatable.Step==(int(step+1))) & (self.datatable.Chan==2)]
            ntrials = self.datatable.Trials[(self.datatable.Step==(int(step+1))) & (self.datatable.Chan==1)]
            if len(ch1start)==1:
                #normally each step runs only once but if it's repeated, ESPION doubles the entries
                ch1start = int(ch1start)
                ch2start = int(ch2start-1)
                ntrials = int(ntrials)
                data[stepname] = self.espion_step(ch1start=ch1start, ch2start=ch2start, ntrials=ntrials, csvtable=fullcsv)
            elif len(ch1start.unique())==1:
                #found duplicates but all have the same column start
                ch1start = int(ch1start.unique())
                ch2start = int(ch2start.unique()-1)
                ntrials = int(ntrials.sum())
                data[stepname] = self.espion_step(ch1start=ch1start, ch2start=ch2start, ntrials=ntrials, csvtable=fullcsv)
        return data
    
    @staticmethod
    def espion_step(ch1start, ch2start, ntrials, csvtable):
        """Loader for a single erg ESPION step"""
        colstart = ch1start-1
        colend = colstart+1+(ntrials*2)
        currcsv = csvtable.iloc[0:, colstart:colend].copy(deep=0)
        currcsv = currcsv.dropna().reset_index(drop=True)
        currcsv = currcsv.drop(0).reset_index(drop=True)
        colnames = []
        ch1cnt = 0
        ch2cnt = 0
        for i in range(0, len(currcsv.columns)):
            currcsv.iloc[0:, i] = pandas.to_numeric(currcsv.iloc[0:, i])
            if i == 0:
                colnames.append('t')
            elif 1 <= i < 1+ntrials:
                ch1cnt += 1
                colnames.append('L' + str(ch1cnt).zfill(2))
            elif 1+ntrials <= i < 1+(ntrials*2):
                ch2cnt += 1
                colnames.append('R' + str(ch2cnt).zfill(2))
        currcsv.columns = colnames
        currcsv = currcsv.divide(1000)
        csvoutput = currcsv.copy()
        return csvoutput

    def HDF5remap(self):
        dt = h5py.special_dtype(vlen=bytes)
        intfields = ["Steps", "Channels"]
        
        h5name = self.savepath + self.filename + ".h5"
        print('Saving h5 file...')
        with h5py.File(h5name, 'w') as hfile:
#             print('\tFrom datatable:')
            for col in self.datatable.columns:
                hfile.create_dataset(col.replace(' ','_'), data=self.datatable.get(col))
#                 print('\t\t'+ col)
#             print('\tFrom metadata:')
            for key in self.metadata:
                if key in intfields:
                    hfile.attrs.create(key.replace(' ','_'), data=self.metadata[key])
                else:
                    hfile.attrs.create(key.replace(' ','_'), data=str(self.metadata[key]), dtype=dt)
#                 print('\t\t' + key)
            # print('\tFrom data:')
            for step in self.data:
                group = hfile.create_group(step)
                group.create_dataset('t', data=self.data[step].filter(regex = 't'))
                group.create_dataset('L', data=self.data[step].filter(regex = 'L'))
                group.create_dataset('R', data=self.data[step].filter(regex = 'R'))
                # print('\t\t' + step)
        print('Saved to: ' + h5name + '\n')
        
# if __name__ == "__main__":
#     a = espion_file("20160928/20160928_wl05_2_eml1het", "20160928_wl05_2_01_iSscotdark", "Mouse")


In [10]:
# map a single espion csv exported file to hdf5
a = espion_file("20170309/20170309_wl05_107_wt", "01_iSeriesScotopicStitch", "Mouse", "wt")

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20170309/20170309_wl05_107_wt/01_iSeriesScotopicStitch.h5



In [None]:
# genotypes
# wt
# eml1+/-
# eml1-/-


In [6]:
# map all espion csv exported files on a single folder to hdf5
import os

path={};
path['datafolder']='20171025/20171025_Sq1040_MB001High'
path['species']='Squirrel'
path['root'] = '/Users/angueyraaristjm/Documents/LiData/invivoERG/'
path['fullpath']=path['root']+path['datafolder']+'/'
path['genotype']='Wild'

for root, dirs, files in os.walk(path['fullpath'], topdown=True):
    dirs.clear() #with topdown true, this will prevent walk from going into subs
    for file in files:
        if file.endswith(".csv"):
            fName=file[:-4]
            if (fName + ".h5") in files:
                print(fName + ' is already mapped')
            else:
                erg = espion_file(path['datafolder'], fName, path['species'],path['genotype'])  

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/01_IseriesPre.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/02_FlashPre.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/02a_FlashesPreDimmer.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/03_FlashPost0s.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/04_FlashPost1min19s.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/05_FlashPost2min53s.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Documents/LiData/invivoERG/20171025/20171025_Sq1040_MB001High/06_FlashPost4min20s.h5

Saving h5 file...
Saved to: /Users/angueyraaristjm/Document

In [1]:
# spit out list of csv files
import os

path={};
path['datafolder']='20160819/20160819_Sq813'
path['species']='Squirrel'
path['root'] = '/Users/angueyraaristjm/Documents/LiData/invivoERG/'
path['fullpath']=path['root']+path['datafolder']+'/'

print('dirData = \'' + path['datafolder'] + '\';')
for root, dirs, files in os.walk(path['fullpath'], topdown=True):
    dirs.clear() #with topdown true, this will prevent walk from going into subs
    for file in files:
        if file.endswith(".csv"):
            fName=file[:-4]
            print('dirFile = \'' + fName + '\';')

dirData = '20160819/20160819_Sq813';
dirFile = '20160819_Sq813_01_IsXeMax';
dirFile = '20160819_Sq813_02_Steps2sG_pre';
dirFile = '20160819_Sq813_03_sines';
dirFile = '20160819_Sq813_04_Flashes_pre';
dirFile = '20160819_Sq813_05_Flashes_posti_0min';
dirFile = '20160819_Sq813_06_Flashes_posti_3min';
dirFile = '20160819_Sq813_07_Flashes_posti_7min';
dirFile = '20160819_Sq813_08_IsXeMax_posti_10min';
dirFile = '20160819_Sq813_09_Steps2sG_posti';
dirFile = '20160819_Sq813_10_Flashes_preii';
dirFile = '20160819_Sq813_11_Flashes_postii_0min';
dirFile = '20160819_Sq813_12_Flashes_postii_2o5min';
dirFile = '20160819_Sq813_13_Flashes_postii_5min';
dirFile = '20160819_Sq813_14_IsXeMax_postii_8min';
dirFile = '20160819_Sq813_15_Steps2sG_postii';


In [28]:
help (print)

Help on built-in function print in module builtins:

print(...)
    print(value, ..., sep=' ', end='\n', file=sys.stdout, flush=False)
    
    Prints the values to a stream, or to sys.stdout by default.
    Optional keyword arguments:
    file:  a file-like object (stream); defaults to the current sys.stdout.
    sep:   string inserted between values, default a space.
    end:   string appended after the last value, default a newline.
    flush: whether to forcibly flush the stream.

