In [1]:
import os
import io

In [16]:
def read_rec_header(path, max_lines=1000, encoding='ISO-8859-1', stop_marker=None):
    header = []
    with io.open(path, 'r', encoding=encoding) as fh:
        try:
            cnt = 0
            for line in fh:
                header.append(line)
                cnt = cnt + 1
                stop = (cnt >= max_lines) or (stop_marker and stop_marker in line)
                if stop:
                    break
        except Exception as e: # catch *all* exceptions
            print('Error while reading line {}:'.format(cnt))
            print(e)
            fcopy.close()
            return None
    return header

In [4]:
def _copy_header_lines(rec_path=None, copy_path=None, encoding='ISO-8859-1',
                       stop_marker=None, max_lines=1000):
    if rec_path is None:
        raise ValueError('input path is required.')
    if copy_path is None:
        raise ValueError('output path is required.')
    fcopy = io.open(copy_path, 'w', encoding=encoding)
    with io.open(rec_path, 'r', encoding=encoding) as fh:
        try:
            cnt = 0
            for line in fh:
                fcopy.write(line)
                cnt = cnt + 1
                stop = (cnt >= max_lines) or (stop_marker and stop_marker in line)
                if stop:
                    break
        except Exception as e: # catch *all* exceptions
            print('Error while reading line {}:'.format(cnt))
            print(e)
            fcopy.close()
            return None
    fcopy.close()
    return cnt

def copy_rec_header(rec_path, max_lines=1000, copy_dir='tmp_out'):    
    # check input path
    rec_dir = os.path.dirname(rec_path)
    rec_filename = os.path.basename(rec_path)
    if rec_filename[-4:] != '.rec':
        raise ValueError('unknown file extension')
    print('Input file: {}'.format(rec_path))
    
    # set output path
    os.makedirs(copy_dir, exist_ok=True)
    copy_path = os.path.join(copy_dir, 
                             rec_filename + 'Header' # .recHeader
                            )
    print('Output file: {}'.format(copy_path))
    
    # copy line by line
    cnt = _copy_header_lines(rec_path=rec_path,
                             copy_path=copy_path,
                             encoding='ISO-8859-1',
                             stop_marker='</Configuration>',
                             max_lines=max_lines
                            )
    print('Copied {} lines.'.format(cnt))
    

In [5]:
file_name = '/data2/jason/kf2/raw/20170207/20170207_kf2_01_s1.rec'

In [6]:
! ls -lh $file_name

-rw-rw-r-- 1 jason jason 29G Feb  8  2017 /data2/jason/kf2/raw/20170207/20170207_kf2_01_s1.rec


In [7]:
date = '20170207'
animal = 'kf2'

sess_tags = []
cnt = 0
stop = False
for i in range(5):
    for ph in ['s', 'r']:
        cnt = cnt + 1
        sess = '{:02d}_{}{}'.format(cnt, ph, i+1)
        sess_tags.append(sess)
        if cnt >= 7:
            stop = True
            break
    if stop:
        break
    
print(sess_tags)

['01_s1', '02_r1', '03_s2', '04_r2', '05_s3', '06_r3', '07_s4']


In [17]:
for sess in sess_tags:
    rec_dir = '/data2/jason/{}/raw/{}/'.format(animal, date)
    rec_filename = '{}_{}_{}.rec'.format(date, animal, sess)
    rec_path = os.path.join(rec_dir + rec_filename)
    copy_rec_header(rec_path)

Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_01_s1.rec
Output file: tmp_out/20170207_kf2_01_s1.recHeader
Copied 496 lines.
Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_02_r1.rec
Output file: tmp_out/20170207_kf2_02_r1.recHeader
Copied 496 lines.
Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_03_s2.rec
Output file: tmp_out/20170207_kf2_03_s2.recHeader
Copied 496 lines.
Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_04_r2.rec
Output file: tmp_out/20170207_kf2_04_r2.recHeader
Copied 496 lines.
Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_05_s3.rec
Output file: tmp_out/20170207_kf2_05_s3.recHeader
Copied 496 lines.
Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_06_r3.rec
Output file: tmp_out/20170207_kf2_06_r3.recHeader
Copied 496 lines.
Input file: /data2/jason/kf2/raw/20170207/20170207_kf2_07_s4.rec
Output file: tmp_out/20170207_kf2_07_s4.recHeader
Copied 496 lines.


In [18]:
H = read_rec_header('tmp_out/20170207_kf2_07_s4.recHeader')
print(len(H))

496


In [38]:
def write_xml_from_list(out_filename, header):
    with open(out_filename, 'w') as fh:
        for line in header:
            fh.write('%s' % line) # line already includes '\n'

In [39]:
write_xml_from_list('test.xml', H)

## header xml to dict

In [55]:
import xmltodict
import json

def read_xml(xml_path, attr_prefix='', unorder_dict=True):
    '''
    default attr_prefix for xmltodict is '@'
    '''
    with open(xml_path) as fh:
        ordered_dict = xmltodict.parse(fh.read(), attr_prefix=attr_prefix)
    if unorder_dict:
        # convert to plain dict
        return json.loads(json.dumps(ordered_dict))
    return ordered_dict

In [72]:
def show_keys(dict_obj, depth=None, prefix='- ', indent='  '):
    if depth == 0:
        return
    for key, value in dict_obj.items():
        print(prefix + key)
        if not isinstance(value, dict):
            continue
        depth = depth - 1 if (depth is not None) else None
        show_keys(value, depth=depth, 
                  prefix=(indent + prefix), indent=indent)
    return

In [56]:
data = read_xml('test.xml')

In [63]:
for key in data['Configuration']:
    print(key)
    value = data['Configuration'][key]
    if not isinstance(value, dict):
        continue
    for k, v in value.items():
        print(' - {}: {}'.format(k, v))

GlobalConfiguration
 - filePrefix: test
 - systemTimeAtCreation: 1486485696798
 - realtimeMode: 0
 - saveDisplayedChanOnly: 1
 - filePath: /Users/karlssonm/datadump
 - timestampAtCreation: 2815990
HardwareConfiguration
 - samplingRate: 30000
 - numChannels: 320
 - Device: [{'numBytes': '1', 'name': 'MCU_IO', 'packetOrderPreference': '10', 'available': '1', 'Channel': [{'dataType': 'digital', 'id': 'MCU_Din1', 'bit': '0', 'startByte': '0', 'input': '1'}, {'dataType': 'digital', 'id': 'MCU_Din2', 'bit': '1', 'startByte': '0', 'input': '1'}, {'dataType': 'digital', 'id': 'MCU_Din3', 'bit': '2', 'startByte': '0', 'input': '1'}, {'dataType': 'digital', 'id': 'MCU_Din4', 'bit': '3', 'startByte': '0', 'input': '1'}, {'dataType': 'digital', 'id': 'MCU_Din5', 'bit': '4', 'startByte': '0', 'input': '1'}, {'dataType': 'digital', 'id': 'MCU_Din6', 'bit': '5', 'startByte': '0', 'input': '1'}, {'dataType': 'digital', 'id': 'MCU_Din7', 'bit': '6', 'startByte': '0', 'input': '1'}, {'dataType': 'digita

In [57]:
write_yml('test-xml-to.yml', data)

In [70]:
!head test-xml-to.yml --lines=50

Configuration:
  AuxDisplayConfiguration:
    DispChannel:
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din1
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din2
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din3
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din4
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din5
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din6
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din7
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din8
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'
      device: ECU
      id: Din9
      maxDisp: '2'
    - analyze: '0'
      color: '#aaaaaa'


## metadata.yml file

In [36]:
import yaml

def write_yml(yml_path, data):
    with io.open(yml_path, 'w') as fh:
        yaml.dump(data, fh, default_flow_style=False)
        
def read_yml(yml_path):
    with io.open(yml_path) as fh:
        data = yaml.load(fh, Loader=yaml.FullLoader)
    return data

In [32]:
data = dict(
    A = 'a',
    B = dict(
        C = ['c1', 'c2', 'c3'],
        D = 'd',
        E = 'e',
    )
)

write_yml('test.yml', data)

In [71]:
!head test.yml

A: a
B:
  C:
  - c1
  - c2
  - c3
  D: d
  E: e


In [37]:
read_yml('test.yml')

{'A': 'a', 'B': {'C': ['c1', 'c2', 'c3'], 'D': 'd', 'E': 'e'}}

In [79]:
yml_filename = '../yaml/beans20190718_metadata.yml'
print('[{}]'.format(yml_filename))
beans_meta = read_yml(yml_filename) 
show_keys(beans_meta)

[../yaml/beans20190718_metadata.yml]
- experimenter name
- lab
- institution
- experiment description
- session description
- session_id
- subject
  - description
  - genotype
  - sex
  - species
  - subject id
  - weight
- data acq device
- associated_files
- device
  - name
- units
  - analog
  - behavioral_events
- times_period_multiplier
- raw_data_to_volts
- default_header_file_path
- cameras
- tasks
- associated_video_files
- behavioral_events
- electrode groups
- ntrode electrode group channel map


In [80]:
yml_filename = '../yaml/kibbles20170216_metadata.yml'
print('[{}]'.format(yml_filename))
kibbles_meta = read_yml(yml_filename) 
show_keys(kibbles_meta)

[../yaml/kibbles20170216_metadata.yml]
- experimenter name
- lab
- institution
- experiment description
- session description
- session_id
- subject
  - description
  - genotype
  - sex
  - species
  - subject id
  - weight
- data acq device
- associated_files
- units
  - analog
  - behavioral_events
- times_period_multiplier
- cameras
- tasks
- associated_video_files
- behavioral_events
- device
  - name
- electrode groups
- ntrode electrode group channel map
