## Example Usage

* Below are examples of syntax, using the tool to parse the example file provided

In [23]:
## Parse header and print the metadata from the ASCII header
import importlib
import time
import parseTOBA as TO
importlib.reload(TO)
file = r'example_data\Flux_Data362.dat'
pTO = TO.parseTOBA(mode=1)
pTO.parse(file)
# pTO.Contents

['Not Read', 'example_data\\Flux_Data362.dat']

In [25]:
import parseMixedArray as pMA
import importlib
importlib.reload(pMA)
pM = pMA.parseMixedArray()
pM.parse(r"C:\Users\User\GSC_Work\EC_Processing_Toolkit\exampleInputs\GSC_EC\20240914\WX_data.dat")
pM.Contents['101']

{'Frequency': '1800.0s',
 'arrayContents': {'ArrayID': {'unit_in': None,
   'operation': 'Smp',
   'dataType': 'int32',
   'ignore': False},
  'Year_RTM': {'unit_in': None,
   'operation': 'RTM',
   'dataType': 'int32',
   'ignore': False},
  'Day_RTM': {'unit_in': None,
   'operation': 'RTM',
   'dataType': 'int32',
   'ignore': False},
  'Hour_Minute_RTM': {'unit_in': None,
   'operation': 'RTM',
   'dataType': 'int32',
   'ignore': False},
  'BattV_AVG': {'unit_in': None,
   'operation': 'AVG',
   'dataType': 'float32',
   'ignore': True},
  'Current_AVG': {'unit_in': None,
   'operation': 'AVG',
   'dataType': 'float32',
   'ignore': True},
  'Voltage_AVG': {'unit_in': None,
   'operation': 'AVG',
   'dataType': 'float32',
   'ignore': True},
  'NR_Wm2_AVG': {'unit_in': None,
   'operation': 'AVG',
   'dataType': 'float32',
   'ignore': True},
  'SlrW_AVG': {'unit_in': None,
   'operation': 'AVG',
   'dataType': 'float32',
   'ignore': True},
  'AirTC_AVG': {'unit_in': None,
   'op

In [9]:
import parseHobo as pHO
import importlib
importlib.reload(pHO)
pH = pHO.parseHoboCSV(mode = 2)
pH.parse(r"example_data\20750528-SHSC.SSM.SGT.240720_240724readout.csv")
pH.Contents

{'RecordNumber': {'unit_in': '',
  'logger': '',
  'sensor': '',
  'ignore': True,
  'dataType': 'int32'},
 'Date_Time': {'unit_in': ' GMT+00:00',
  'logger': '',
  'sensor': '',
  'ignore': True,
  'dataType': 'object'},
 'Temp_LBL_5': {'unit_in': ' 째C ',
  'logger': 'LGR S/N: 20750528',
  'sensor': ' SEN S/N: 20750528',
  'ignore': False,
  'dataType': 'float32'},
 'Temp_LBL_25': {'unit_in': ' 째C ',
  'logger': 'LGR S/N: 20750528',
  'sensor': ' SEN S/N: 20750528',
  'ignore': False,
  'dataType': 'float32'},
 'Temp_LBL_50': {'unit_in': ' 째C ',
  'logger': 'LGR S/N: 20750528',
  'sensor': ' SEN S/N: 20750528',
  'ignore': False,
  'dataType': 'float32'},
 'Temp_LBL_100': {'unit_in': ' 째C ',
  'logger': 'LGR S/N: 20750528',
  'sensor': ' SEN S/N: 20750528',
  'ignore': False,
  'dataType': 'float32'},
 'Host_Connected': {'unit_in': 'LGR S/N: 20750528',
  'logger': '',
  'sensor': '',
  'ignore': True,
  'dataType': 'object'},
 'End_Of_File': {'unit_in': 'LGR S/N: 20750528',
  'logger'

In [22]:
class p:
    def __init__(self):
        pass

P = p()
P.p = 1
P.p

1

In [1]:
## Read the file and output data to a 2D numpy array

import time
import parseTOBA as TO
file = r'example_data\Met_Data100.dat'
T1 = time.time()
TO = TO.parseTOBA()
TO.parse(file,mode=1)
print('Completed in: ',round((time.time()-T1),4),' s')


def setName(Metadata):
    name = '_'.join([
        value for key,value in Metadata.items() 
            if value is not None and 
            key != 'Timestamp' and
            key != 'Timezone'
    ])
    for rep in [' ','.','/','\\']:
        name = name.replace(rep,'_')
    return(name)

print(setName(TO.Metadata))
# print(setName(pM.Metadata))
# print(TO.Metadata)
TO.Metadata

Completed in:  0.0118  s
TOB3_CR1000X_57840_SCL_2024_07_25_Met_Data_30_min


{'Type': 'TOB3',
 'StationName': None,
 'LoggerModel': 'CR1000X',
 'SerialNo': '57840',
 'Program': 'SCL_2024_07_25',
 'Table': 'Met_Data',
 'Frequency': '30 min',
 'Timezone': None,
 'Timestamp': '2024-08-10 00:00'}

In [None]:
## Read the file and output data to a pandas dataframe

import time
import parseTOBA as TO
file = r'example_data\Met_Data100.dat'
T1 = time.time()
TO = TO.parseTOBA()
TO.parse(file,mode=2)
print('Completed in: ',round((time.time()-T1),4),' s')

TO.Data.head()

example_data\Met_Data100.dat
Frames  4
Completed in:  0.0064  s


Unnamed: 0_level_0,Current_Avg,BattV_Avg,Voltage_Avg,NetRad_Avg,NetRad_Corrected_Avg,SlrW_Avg,AirTC_Avg,RH,BV_BP_Avg,BV_Temp_Avg,BV_Qual_Avg,Water_Level_m,Water_Temp_C,PTemp_C_Avg,TWGS_1_Avg,TWGS_2_Avg,TWGS_3_Avg
TIMESTAMP,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2024-08-10 00:30:00,0.770357,13.897627,13.946882,95.178658,97.047462,107.747437,13.525503,88.136353,1004.939697,13.033334,6.833444,0.05,8.2,15.832467,14.72954,14.175689,14.115864
2024-08-10 01:00:00,0.13114,13.716902,13.779596,35.430405,35.795799,79.168304,13.079014,93.205093,1005.091919,12.713916,6.835833,0.05,8.1,15.507368,14.635816,14.094248,13.979114
2024-08-10 01:30:00,0.190026,13.623872,13.685821,35.368614,35.594788,90.60611,12.589458,94.487923,1004.994995,12.037277,6.836416,0.05,8.1,14.914474,14.54034,14.01538,13.89111
2024-08-10 02:00:00,-0.230603,13.476729,13.547348,29.150885,29.233795,70.471649,12.395253,95.78643,1004.98938,11.500916,6.838,0.05,8.1,14.445471,14.394469,13.881736,13.69877
2024-08-10 02:30:00,-0.722745,13.102289,13.19115,10.500364,10.551958,49.774025,12.54636,95.408714,1005.097717,11.319972,6.841944,0.052,8.1,14.255431,14.250505,13.760682,13.568392


In [None]:
## Read the file, output data to a pandas dataframe, and save as a TOA5 formatted fiel in specified directory

import time
import parseTOBA as TO
file = r'example_data\Met_Data100.dat'
T1 = time.time()
TO = TO.parseTOBA()
TO.parse(file,mode=2,saveTo='C:/temp')
print('Completed in: ',round((time.time()-T1),4),' s')


example_data\Met_Data100.dat
Frames  4
Converted  example_data\Met_Data100_2024_08_10_0000.dat  to  C:/temp\Met_Data100_2024_08_10_0000.dat
Completed in:  0.0162  s


Speed for a given file is comparable to the cardconvert program.  But significant performance gains can be made for conversion by running the tool in parallel over a given directory, such as shown below:

In [None]:
import os
import time
import parseTOBA as TO
from functools import partial
from multiprocessing import Pool

dpath = 'D:/GSC_Work/SCL_2024/20240724'
fileList = [os.path.join(dpath,f) for f in os.listdir(dpath) if '.dat' in f]
print('Processing: ',len(fileList),' files\n')

if __name__ == '__main__':
    with Pool(processes=4) as pool:
        T1 = time.time()
        TO = TO.parseTOBA(log=True)
        log = pool.map(partial(TO.parse,mode=2,saveTo='C:/temp'),fileList)
        print('Completed in: ',round((time.time()-T1),4),' s')
for result in log:
    print(result)

Processing:  250  files

Completed in:  333.0052  s
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data242_2024_07_20_2053.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data243_2024_07_20_2100.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data244_2024_07_20_2101.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data245_2024_07_20_2102.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data246_2024_07_20_2106.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data247_2024_07_20_2109.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data248_2024_07_20_2114.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data249_2024_07_20_2130.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data250_2024_07_20_2145.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data251_2024_07_20_2146.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data252_2024_07_20_2200.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240724\\Flux_Data253_2024_07_20_2208.dat']
['Read', 'D:/GSC_Work/SCL_2024/20240