In [7]:
import os
from datetime import datetime
import pandas as pd
import numpy as np
from common import sessions, data_path, load_metadata
root = '/Users/lukearend/phd/kfx/data/mnt/Ketamine'

In [8]:
from tqdm import tqdm

metadata = {}
missing = '/Users/lukearend/phd/kfx/ref/2022-08-08-04-05-00_M017_SAL_mPFC_HPC_0_0_0mpk_g0_t0.imec1.lf.meta'
for region in ['hpc', 'pfc']:
    metadata[region] = {}
    for signal in ['lfp', 'ap']:
        df = []
        for i, sess in tqdm(sessions.iterrows(), total=36):
            metafile = data_path(sess.path, signal, region, 'meta')
            path = os.path.join(root, metafile)
            try:
                md = pd.Series(load_metadata(path))
            except FileNotFoundError:
                md = pd.Series(load_metadata(missing))
            md = md[[
                'fileCreateTime', 'fileSizeBytes', 'fileTimeSecs', 'firstSample',
                'imAiRangeMax', 'imAiRangeMin', 
                'imMaxInt', 'imSampRate', 'imroTbl'
                
            ]]
            df.append(md)
        metadata[region][signal] = pd.DataFrame(df, index=sessions.index)

100%|██████████████████████████████████████████| 36/36 [00:00<00:00, 554.02it/s]
100%|██████████████████████████████████████████| 36/36 [00:00<00:00, 522.18it/s]
100%|██████████████████████████████████████████| 36/36 [00:00<00:00, 524.36it/s]
100%|██████████████████████████████████████████| 36/36 [00:00<00:00, 464.01it/s]


In [9]:
df = []
for recid in range(36):
    for region in ['hpc', 'pfc']:
        for signal in ['lfp', 'ap']:
            md = metadata[region][signal].loc[recid]
            ser = {
                'recording': recid,
                'signal': signal,
                'region': region,
                'created': datetime.fromisoformat(md.fileCreateTime),
                'sizebytes': int(md.fileSizeBytes),
                'duration': float(md.fileTimeSecs),
                'firstsample': int(md.firstSample),
                'samplerate': int(md.imSampRate),
                'gain': int(md.imroTbl.strip('()').split(')(')[1:][0].split(' ')[{'ap': 3, 'lfp': 4}[signal]]),
                'minvolts': md.imAiRangeMin,
                'maxvolts': md.imAiRangeMax,
                'bitdepth': md.imMaxInt,
                'chanstart': 0,
                'chanend': 383,
                'chansys': 384,
                'nchans': 385,
            }
            df.append(ser)
df = pd.DataFrame(df)

cols = ['recording', 'signal', 'region']
df = df.sort_values(cols)
idx = df[cols]
df = df.drop(columns=cols)
idx = pd.MultiIndex.from_frame(idx)
df.index = idx

In [10]:
df.to_csv('/Users/lukearend/phd/kfx/ref/metadata.csv')

In [11]:
pd.read_csv('/Users/lukearend/phd/kfx/ref/metadata.csv')

Unnamed: 0,recording,signal,region,created,sizebytes,duration,firstsample,samplerate,gain,minvolts,maxvolts,bitdepth,chanstart,chanend,chansys,nchans
0,0,ap,hpc,2022-07-27 19:41:37,167613398260,7255.991267,8342038,30000,500,-0.6,0.6,512,0,383,384,385
1,0,ap,pfc,2022-07-27 19:41:37,167613730900,7256.005667,8342038,30000,500,-0.6,0.6,512,0,383,384,385
2,0,lfp,hpc,2022-07-27 19:41:37,13967783830,7255.991600,695169,2500,125,-0.6,0.6,512,0,383,384,385
3,0,lfp,pfc,2022-07-27 19:41:37,13967811550,7256.006000,695169,2500,125,-0.6,0.6,512,0,383,384,385
4,1,ap,hpc,2022-07-28 13:19:08,172464739370,7466.006033,2005727,30000,500,-0.6,0.6,512,0,383,384,385
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,34,lfp,pfc,2022-10-27 15:45:10,17451673470,9065.804400,1153931,2500,125,-0.6,0.6,512,0,383,384,385
140,35,ap,hpc,2022-10-28 15:40:06,209308274560,9060.964267,5999872,30000,500,-0.6,0.6,512,0,383,384,385
141,35,ap,pfc,2022-10-28 15:40:06,209308126720,9060.957867,5999872,30000,500,-0.6,0.6,512,0,383,384,385
142,35,lfp,hpc,2022-10-28 15:40:06,17442356470,9060.964400,499989,2500,125,-0.6,0.6,512,0,383,384,385
