In [77]:
import re
import csv
import pandas as pd

In [78]:
import os
def diriter(root_dir, extension):
    ''' grab files from root/sub dirs having extension
    '''
    return ((f, os.path.join(root, f))
            for root, dirs, files in os.walk(root_dir)
            for f in files if f.endswith(extension))

In [105]:
def make_tokens(filepath, delim=','):
    ''' split csv file into list of token lists
    '''
    ptrn = re.compile('\d+=')
    return [line for line in csv.reader(open(filepath))
            if any([line[0] == 'Header=',
                   ptrn.match(line[0])])]


def find_headers(lines):
    ''' find poition of headers in file
    '''
    return [i for i, line in enumerate(lines)
            if line[0].strip() == 'Header='] + [len(lines)]


def make_lists(hpos, lines):
    ''' split lists of tokens by header
    '''
    n, k = len(hpos), len(lines)
    return [lines[hpos[i]:(hpos[i + 1])]
            for i in range(n - 1)]


def make_dataframes(lines):
    ''' make data frame from lists of list of tokens
    '''
    return [pd.DataFrame(line[1:], columns=line[0])
            for line in lines]


def cbind_dataframes(dfs):
    ''' column bind dfs and drop dup columns
    '''
    
    df = (pd.concat([df for df in dfs], axis=1)
            .assign(lib_rt = lambda df: df['RT'])
            .assign(tic_rt = lambda df: df['R.T.'])
            .drop(['Header=', 'R.T.', 'RT', 'Area Pct', 'Peak'], axis=1))
    df.columns = [col.replace('/', '_').replace(' ', '_').lower()
                  for col in df.columns]
    return df
    

In [106]:
file_path = 'data/FA03.D/RESULTS.CSV'

In [107]:
tokens = make_tokens(file_path)

In [109]:
hposition = find_headers(tokens)

In [110]:
hposition

[2, 31, 60]

In [111]:
token_lists = make_lists(hposition, tokens)

In [112]:
dfs = make_dataframes(token_lists)

In [113]:
dfs[0].shape

(28, 11)

In [114]:
dfs[1].shape

(28, 8)

In [115]:
final = cbind_dataframes(dfs)

In [116]:
final.head(40)

Unnamed: 0,first,max,last,pk__ty,height,area,pct_max,pct_total,pk,library_id,ref,cas,qual,lib_rt,tic_rt
0,465,473,494,rBV3,257808,1489466,13.12,2.034,1,Methyl octanoate,17,000000-00-0,96,5.7877,5.788
1,733,745,812,rBV,964743,2491449,21.94,3.401,2,Methyl decanoate,1,000000-00-0,98,7.3441,7.344
2,859,866,904,rBV,608418,1277982,11.25,1.745,3,Methyl undecanoate,2,000000-00-0,98,8.0364,8.036
3,970,977,1017,rBV,1929049,2905961,25.59,3.967,4,Methyl dodecanoate,3,000000-00-0,98,8.6715,8.672
4,1077,1083,1116,rBV,882521,1436154,12.65,1.961,5,Methyl tridecanoate,4,000000-00-0,99,9.2781,9.278
5,1168,1173,1179,rBV,1075433,1379404,12.15,1.883,6,Methyl myristoleic acid,5,000000-00-0,99,9.793,9.793
6,1179,1183,1220,rVB,2474155,3443757,30.32,4.702,7,Methyl myristate,6,000000-00-0,99,9.8503,9.85
7,1267,1272,1278,rBV,1073796,1449016,12.76,1.978,8,cis-10-Pentadecenoic acid methyl ester,7,000000-00-0,99,10.3595,10.36
8,1278,1282,1315,rVB,1116633,1821171,16.04,2.486,9,Methyl myristate,6,000000-00-0,83,10.4168,10.417
9,1363,1372,1386,rBV,1047443,1571132,13.83,2.145,10,Methyl palmitoleate,9,000000-00-0,99,10.9317,10.932
