In [1]:
import numpy as np
import pandas as pd
import re

In [2]:
# col_counter extracts the number of columns as listed in the .LBL file
# it takes a single string argument interpreted as a .LBL file and returns
# an integer representing column count 
def col_counter(filename): 
    stop = "COLUMNS"
    column_count = 0
    with open(filename) as lbl:
        while True: 
            line = lbl.readline().strip()

            if (stop in line): 
                key, val = line.split('=') 
                column_count = int(val.strip())

                break
    
    return column_count

# row_counter extracts the number of rows as listed in the .LBL file
# it takes a single string argument interpreted as a .LBL file and returns 
# an integer representing row count 
def row_counter(filename): 
    stop = "ROWS"
    row_count = 0
    with open(filename) as lbl: 
        while True: 
            line = lbl.readline().strip()

            if (stop in line): 
                key, val = line.split('=')
                row_count = int(val.strip())

                break
    
    return row_count     

In [3]:

# col_parser extracts the column names from the .LBL file
# it takes a single string argument interpreted as a .LBL file and 
# returns a numpy array containing column names 
def col_parser(filename):
    # this regex searches for lines of the form "NAME = arbitrary string", ignoring whitespace and capitilzation
    format = re.compile(r"^\s*NAME\s*=\s*.+?\s*$", re.IGNORECASE)

    col_names = []

    with open(filename) as lbl: 
        for line in lbl: 
            if (format.match(line)): 
                key, val = line.split('=')

                name = val.strip()
                name = name.strip('"')

                col_names.append(name)

    return col_names

In [None]:
def time_conversion(df):
     

In [33]:
# fill_df_csv takes in a filename representing a .TAB file and a list
# containing the name of the columns extracted from the .LBL. 
# It returns a pandas dataframe with the .TAB files contents 
# it assumes the .TAB is comma delimited 
def fill_df_csv(filename, columns): 
    data = []

    # REGEX statements for detecting label and decorative lines
    label = re.compile(r'^\s*\d+(?:\s+\d+)*\s*$')
    decor = re.compile(r'^[= ]+$')

    with open(filename) as tab: 
        for line in tab: 
            # skip decorative lines
            if (decor.match(line) or label.match(line)): 
                continue

            fields = line.strip().split(',')

            data.append(fields)

    df = pd.DataFrame(data, columns=columns)
    return df

#  same as fill_df_csv, but assumes the .TAB is space delimited
def fill_df_space(filename, columns): 
    data = []

    # REGEX statements for detecting label and decorative lines 
    label = re.compile(r'^\s*\d+(?:\s+\d+)*\s*$')
    decor = re.compile(r'^[= ]+$')

    with open(filename) as tab: 
        for line in tab: 
            # skip decorative lines
            if (decor.match(line) or label.match(line)): 
                continue

            fields = line.strip().split()

            data.append(fields)

    df = pd.DataFrame(data, columns=columns)
    return df

In [31]:
columns = col_parser("data/VOYAGER/HG_48S.LBL")
fill_df_csv("data/VOYAGER/HG_48S.TAB", columns)

Unnamed: 0,Time,Spacecraft Clock,MAG ID,Br,Bt,Bn,Bmag,Average Bmag,Delta,Lambda,RMS Br,RMS Bt,RMS Bn,NPTS,DFLAG
0,1979-02-26T00:00:35.897,16164:19:001,1,-0.630,0.968,0.015,1.155,1.156,0.759,123.065,0.014,0.025,0.046,5,""" """
1,1979-02-26T00:01:23.897,16164:20:001,1,-0.568,0.998,-0.076,1.151,1.153,-3.777,119.676,0.029,0.018,0.066,5,""" """
2,1979-02-26T00:02:11.897,16164:21:001,1,-0.598,0.983,-0.074,1.153,1.155,-3.654,121.300,0.057,0.018,0.040,5,""" """
3,1979-02-26T00:02:59.897,16164:22:001,1,-0.656,0.951,-0.076,1.158,1.160,-3.775,124.572,0.057,0.047,0.015,5,""" """
4,1979-02-26T00:03:47.897,16164:23:001,1,-0.630,0.980,-0.055,1.167,1.167,-2.681,122.732,0.019,0.018,0.015,5,""" """
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46968,1979-03-24T22:46:20.304,16972:46:001,1,0.100,0.140,-0.206,0.268,0.268,-50.234,54.511,0.003,0.009,0.012,5,""" """
46969,1979-03-24T22:47:08.304,16972:47:001,1,0.081,0.146,-0.221,0.277,0.277,-52.896,60.973,0.005,0.007,0.009,5,""" """
46970,1979-03-24T22:47:56.304,16972:48:001,1,0.064,0.153,-0.233,0.286,0.287,-54.597,67.324,0.010,0.008,0.007,5,""" """
46971,1979-03-24T22:48:44.304,16972:49:001,1,0.061,0.162,-0.228,0.286,0.287,-52.875,69.288,0.009,0.006,0.010,5,""" """


In [37]:
columns = col_parser("data/APOLLO/apollo12_sws_28s_19691119.lbl")
#print(columns)
fill_df_space("data/APOLLO/apollo12_sws_28s_19691119.tab", columns)

Unnamed: 0,EPOCH,JULIAN_DATE,DENSITY,AP_RATIO,VEL,VTH,EAST,NORTH,FLAG,IA,...,IDISCP,GAIN,RMS,KUPA,CURA,CURB,CURC,CUR7,FACTD,XNOISE
0,1969-11-19T18:42:13,2440545.27932,1.8,-9.99,493,318,-99.9,-99.9,2622080,3,...,0,0,19.0,2,13.0,-999.9,2.0,3.0,2.60,6.7
1,1969-11-19T18:42:41,2440545.27964,0.8,-9.99,591,256,-99.9,-99.9,2622080,3,...,0,0,16.0,2,9.0,-999.9,-999.9,1.0,1.90,7.2
2,1969-11-19T18:43:09,2440545.27997,0.4,-9.99,734,145,-99.9,-99.9,2622080,3,...,0,0,17.0,2,10.0,-999.9,2.0,-999.9,1.30,7.1
3,1969-11-19T18:43:37,2440545.28029,0.6,-9.99,673,187,-99.9,-99.9,2622080,3,...,0,0,14.0,2,10.0,-999.9,-999.9,-999.9,1.46,6.7
4,1969-11-19T18:44:05,2440545.28061,1.0,-9.99,591,230,-99.9,-99.9,2622080,3,...,0,0,13.0,2,13.0,-999.9,2.0,1.0,1.76,6.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
522,1969-11-19T23:57:07,2440545.49800,-99.9,-9.99,-999,-99,-99.9,-99.9,524810,0,...,0,0,-99.9,2,1.0,-999.9,-999.9,-999.9,9.99,-999.9
523,1969-11-19T23:57:35,2440545.49832,-99.9,-9.99,-999,-99,-99.9,-99.9,524808,3,...,0,0,99.0,5,3.0,-999.9,-999.9,-999.9,9.99,3.7
524,1969-11-19T23:58:03,2440545.49865,-99.9,-9.99,-999,-99,-99.9,-99.9,524808,3,...,0,0,99.0,3,1.0,-999.9,-999.9,-999.9,9.99,3.5
525,1969-11-19T23:58:31,2440545.49897,-99.9,-9.99,-999,-99,-99.9,-99.9,524808,3,...,0,0,99.0,1,1.0,-999.9,-999.9,-999.9,9.99,3.6
