In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
plt.style.use('seaborn')

In [2]:
from data_io import DataIO # Custom IO file

In [3]:
run_label = '2021_05_06' ## change this as needed

In [4]:
Path = "/gpfs/loomis/project/david_moore/ael59/run_data/2021_05_06/" # data file path

In [5]:
IO = DataIO(Path)

In [6]:
IO.Unzip()

In [7]:
RGA_Data = IO.GetRGAData()

In [8]:
Temp = IO.GetTemperatureData()

In [9]:
RGA_Data.sort_values(by=['Datetime', 'Mass'], inplace=True)
Temp.sort_values(by='Datetime', inplace=True)

In [10]:
rga_start = RGA_Data['Datetime'].sort_values().unique()[0]
temps_start = Temp['Datetime'].sort_values().unique()[0]

In [11]:
if rga_start < temps_start:
    start_datetime = rga_start
else:
    start_datetime = temps_start

In [12]:
#start_datetime

In [13]:
temp_times = (Temp['Datetime'] - start_datetime)/np.timedelta64(1, 's')

In [14]:
gases = ['H2', 'H2O', 'N2', 'O2', 'CO2']
gas_masses = [2.0, 18.0, 28.0, 32.0, 44.0]

In [15]:
final_tables = [] # list of pandas tables containing final data for each gas

for idx, gas in enumerate(gases):
    
    gas_data = RGA_Data[RGA_Data['Mass'] == gas_masses[idx]]
    gas_times = (gas_data['Datetime'] - start_datetime)/np.timedelta64(1, 's')
    gas_ch1_lerp = np.interp(gas_times, temp_times, pd.to_numeric(Temp['CH1']))
    gas_ch2_lerp = np.interp(gas_times, temp_times, pd.to_numeric(Temp['CH2']))
    
    gas_final_data = pd.DataFrame(data=gas_times)
    gas_final_data.columns = ['exp_time']
    gas_final_data['pressure'] = gas_data['Pressure']
    # Celsius to Kelvin
    gas_final_data['ch1_lerp'] = gas_ch1_lerp + 273.2
    gas_final_data['ch2_lerp'] = gas_ch2_lerp + 273.2
    gas_final_data['mean_temp'] = (gas_final_data['ch1_lerp'] + gas_final_data['ch2_lerp'])/2.0
    
    final_tables.append(gas_final_data)

In [16]:
hdf_name = '{}_clean_data.h5'.format(run_label)

if os.path.exists(hdf_name):
    os.remove(hdf_name) # I want to use the append feature, and I don't want old data frames still in the file

for idx, gas in enumerate(gases):
    final_tables[idx].sort_values(by='exp_time', inplace=True) # one more sort just to be sure
    final_tables[idx].to_hdf(hdf_name, key=gas)