# 

In [None]:
def analyse_regen(MS_csv, flow_csv, T_regen, Discount_He=True, Low_conc_CO2=False, P_sat_H2O_bath=0, ms_start=0, flow_start=0, regen_start=0, regen_end=999999, filter_window=15, filelabel='test', smoothing_start={'CO2':0, 'H2O':0, 'N2':0, 'He':0}, water_outliers=False, water_outlier_value=2.5E-5, full_output=False, backgrounds='Default', relative_sensitivities='Default'):
MS_header_row = 26
P_atm = 1.01325E5 #Pa
P_exp = P_atm #Pa
R = 8.314 #J/mol/K
if backgrounds == 'Default':
    background = {'N2':3.78E-9, 'He':1.44E-8, 'CO2':4.16E-11, 'O2':1.86E-10, 'H2O':3.50E-11} #Background MS values to subtract from MS data [torr]
else:
    background = backgrounds
Mw =  {'N2':28E-3, 'He':4E-3, 'CO2':44E-3, 'O2':32E-3, 'H2O':18E-3} #kg/mol
max_outlet = 93.59 * 1.66667e-8 #mg/min to kg/s
max_N2 = 1.66667e-8*100*P_atm/(R*273.15) #mol/s
max_He = 1.66667e-8*141*P_atm/(R*273.15) #mol/s
coriolis_cal = {'N2':1.06, 'He':1.09, 'CO2':1.09, 'O2':1, 'H2O':1.02} 
if relative_sensitivities == 'Default':
    if Low_conc_CO2 == True:
        max_CO2 = 1.66667e-8*0.5*P_atm/(R*273.15) #Max flow rate in the CO2 mass flow meter when its set to fluid 2 mol/s 
        RS = {'N2':1, 'He':1.92, 'CO2':0.021, 'O2':0.98, 'H2O':0.0157} #Defining mass spec relative sensitivities [-]
    
    else:
        max_CO2 = 1.66667e-8*2*P_atm/(R*273.15) #Max flow rate in the CO2 mass flow meter when its set to fluid 1 mol/s 
        RS = {'N2':1, 'He':1.92, 'CO2':0.86, 'O2':0.98, 'H2O':0.0157} #Defining mass spec relative sensitivities [-]
else: #This is if we have selected manual values of relative sensitivities
    if Low_conc_CO2 == True:
        max_CO2 = 1.66667e-8*0.5*P_atm/(R*273.15) #mol/s
        RS = relative_sensitivities #Defining mass spec relative sensitivities [-]
    
    else:
        max_CO2 = 1.66667e-8*2*P_atm/(R*273.15) #mol/s
        RS = relative_sensitivities #Defining mass spec relative sensitivities [-]

H2O_ratio = P_sat_H2O_bath/P_atm

df_MS = pd.read_csv(MS_csv, header=(MS_header_row-1)).drop(['Time', 'Unnamed: 7'], axis=1)
df_MS.loc[:,'Time [s]'] = pd.Series([i/1000 + ms_start for i in df_MS['ms']] , index=df_MS.index)

df_FM = pd.read_csv(flow_csv, sep=';', names = ['Time', 'CO2', 'Time 2', 'He', 'Time 3', 'N2', 'Time 4', 'Outlet']).drop(['Time 2', 'Time 3', 'Time 4'], axis=1)
df_FM.loc[:,'Time [s]'] = pd.Series([i + flow_start for i in df_FM['Time']] , index=df_FM.index)

df_all = pd.merge(df_MS, df_FM, on='Time [s]',how='outer', sort=True).drop(['Time', 'ms'], axis=1).rename(columns={"Nitrogen": "N2 pressure [torr]", "Water": "H2O pressure [torr]", "Carbon dioxide": "CO2 pressure [torr]", "Oxygen": "O2 pressure [torr]", "Helium": "He pressure [torr]", "CO2": "CO2 flow [%]", "He": "He flow [%]", "N2": "N2 flow [%]", "Outlet": "Outlet flow [%]"}) 


    #In this for loop we are interpolating between each time point that exists for the MS data, to get the flows from the coriolis at that time
for comp in ['CO2', 'N2', 'He', 'Outlet']:
    label = comp + ' flow [%]'
    interpolated_flow = []
    interpolated_flow.append(float("NaN"))
    for i in range(1,len(df_all['Time [s]'])-1):
        if m.isnan(df_all[label][i]) == True:
            time_range = df_all['Time [s]'][i+1] - df_all['Time [s]'][i-1]
            time_int = df_all['Time [s]'][i] - df_all['Time [s]'][i-1]
            time_frac = time_int/time_range
            flow_before = df_all[label][i-1]
            flow_range = df_all[label][i+1] - df_all[label][i-1]
            flow_value = flow_before + flow_range*time_frac
            interpolated_flow.append(flow_value)
        else:
            interpolated_flow.append(df_all[label][i])
    interpolated_flow.append(float("NaN"))
    df_all.loc[:,'Interpolated ' + label] = pd.Series(interpolated_flow, index=df_all.index)
#Now converting the data time to the time of the breakthrough step
df_all.loc[:,'Breakthrough time [s]'] = pd.Series([i - regen_start for i in df_all['Time [s]']], index=df_all.index)
#Now deleting rows without MS values
df_breakthrough_start = df_all.loc[(abs(df_all['CO2 pressure [torr]']) > 0)]

#Now we are only taking the part of the dataframe that we are interested in (ignoring drying, cooling, purging etc.)
df_not_breakthrough = df_breakthrough_start.loc[((df_breakthrough_start['Time [s]'] < regen_start))]
df_breakthrough = df_breakthrough_start.loc[((df_breakthrough_start['Time [s]'] > regen_start) & (df_breakthrough_start['Time [s]'] < regen_end))]
df_breakthrough.reset_index(drop=True, inplace=True)
#Fetching the last values before breakthrough is started
n2_p = df_not_breakthrough['N2 pressure [torr]'].iloc[-1]
h2o_p = df_not_breakthrough['H2O pressure [torr]'].iloc[-1]
co2_p = df_not_breakthrough['CO2 pressure [torr]'].iloc[-1]
o2_p = df_not_breakthrough['O2 pressure [torr]'].iloc[-1]
he_p = df_not_breakthrough['He pressure [torr]'].iloc[-1]
time0 = df_not_breakthrough['Time [s]'].iloc[-1]
co2_f = df_not_breakthrough['Interpolated CO2 flow [%]'].iloc[-1]
n2_f = df_not_breakthrough['Interpolated N2 flow [%]'].iloc[-1]
he_f = df_not_breakthrough['Interpolated He flow [%]'].iloc[-1]
out_f = df_not_breakthrough['Interpolated Outlet flow [%]'].iloc[-1]
b_time = 0
#Inserting these values as 0 breakthrough time
df_breakthrough.loc[-1] = [n2_p, h2o_p, co2_p, o2_p, he_p, time0, float("NaN"), float("NaN"), float("NaN"), float("NaN"), co2_f, n2_f, he_f, out_f, b_time]
df_breakthrough.sort_values('Breakthrough time [s]', inplace=True)
df_breakthrough.reset_index(drop=True, inplace=True)
df_breakthrough.drop(['CO2 flow [%]', 'He flow [%]', 'N2 flow [%]', 'Outlet flow [%]'], axis=1, inplace=True)
for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = comp + ' pressure [torr]'
    new_label = 'Corrected ' + label
    mole_frac_label = comp + 'mole fraction [-]'
    if comp == 'N2':
        corrected_list = []
        for i in range(len(df_breakthrough['Breakthrough time [s]'])): 
            if ((df_breakthrough[label][i] - background[comp] - 0.114*df_breakthrough['Corrected CO2 pressure [torr]'][i])/RS[comp]) > 0:
                corrected_list.append((df_breakthrough[label][i] - background[comp] - 0.114*df_breakthrough['Corrected CO2 pressure [torr]'][i])/RS[comp])
            else:
                corrected_list.append(0)   
        df_breakthrough.loc[:,new_label] = pd.Series(corrected_list, index=df_breakthrough.index)
    else:
        df_breakthrough.loc[:,new_label] = pd.Series([(i - background[comp])/RS[comp] if (i - background[comp])/RS[comp] > 0 else 0 for i in df_breakthrough[label]], index=df_breakthrough.index)
#for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
#    label = 'Corrected ' +comp + ' pressure [torr]'
#    new_label = comp + ' mole fraction [-]'
#    molar_list = []
#    for i in range(len(df_breakthrough['Breakthrough time [s]'])): 
#        if Discount_He == True:
#            total = df_breakthrough['Corrected CO2 pressure [torr]'][i] + df_breakthrough['Corrected N2 pressure [torr]'][i] + df_breakthrough['Corrected H2O pressure [torr]'][i] + df_breakthrough['Corrected O2 pressure [torr]'][i]
#        else:
#            total = df_breakthrough['Corrected CO2 pressure [torr]'][i] + df_breakthrough['Corrected N2 pressure [torr]'][i] + df_breakthrough['Corrected He pressure [torr]'][i] + df_breakthrough['Corrected H2O pressure [torr]'][i] + df_breakthrough['Corrected O2 pressure [torr]'][i]
#        molar_list.append(df_breakthrough[label][i]/total)
#    df_breakthrough.loc[:,new_label] = pd.Series(molar_list, index=df_breakthrough.index)
#print('done pressure corrections')
for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = 'Corrected ' +comp + ' pressure [torr]'
    new_label = 'True MS ' +comp + ' mole fraction [-]'
    molar_list = []
    for i in range(len(df_breakthrough['Breakthrough time [s]'])): 
        total = df_breakthrough['Corrected CO2 pressure [torr]'][i] + df_breakthrough['Corrected N2 pressure [torr]'][i] + df_breakthrough['Corrected He pressure [torr]'][i] + df_breakthrough['Corrected H2O pressure [torr]'][i] + df_breakthrough['Corrected O2 pressure [torr]'][i]
        molar_list.append(df_breakthrough[label][i]/total)
    df_breakthrough.loc[:,new_label] = pd.Series(molar_list, index=df_breakthrough.index)
#print('ms mole frac corrections')
df_breakthrough.loc[:,'Fake Outlet average molecular weight [kg/mol]'] = pd.Series([df_breakthrough['True MS CO2 mole fraction [-]'][i]*Mw['CO2']+df_breakthrough['True MS N2 mole fraction [-]'][i]*Mw['N2']+ df_breakthrough['True MS He mole fraction [-]'][i]*Mw['He']+df_breakthrough['True MS O2 mole fraction [-]'][i]*Mw['O2']+df_breakthrough['True MS H2O mole fraction [-]'][i]*Mw['H2O'] for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)
for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = 'True MS ' +comp + ' mole fraction [-]'
    new_label = 'True ' +comp + ' mass flow [kg/s]'
    df_breakthrough.loc[:,new_label] = pd.Series([(max_outlet * df_breakthrough['Interpolated Outlet flow [%]'][i]/100) * df_breakthrough[label][i] * Mw[comp] * coriolis_cal[comp]/df_breakthrough['Fake Outlet average molecular weight [kg/mol]'][i] for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)  


#print('done ms mass flows')
#df_breakthrough.loc[:,'Outlet molar flow [mol/s]'] = pd.Series([(max_outlet*df_breakthrough['Interpolated Outlet flow [%]'][i]/100)/df_breakthrough['Outlet average molecular weight [kg/mol]'][i] for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)
for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = 'True '+comp + ' mass flow [kg/s]'
    new_label = 'True ' + comp + ' molar flow [mol/s]'
    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i] / Mw[comp] for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)
#print('done ms molar flows corrections')
df_breakthrough.loc[:,'He bypass flow [mol/s]'] = pd.Series([df_breakthrough['Interpolated He flow [%]'][i] * max_He/100 for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)
for comp in ['He', 'CO2', 'N2', 'H2O', 'O2']:
    label = 'True '+comp + ' molar flow [mol/s]'
    new_label =  comp + ' molar flow [mol/s]'
    if comp != 'He':
        if water_outliers==True and comp == 'H2O':
            df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i] if df_breakthrough[label][i] < water_outlier_value else df_breakthrough[label][i-1] for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)  
        else:
            df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i]  for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)    
    else:
        df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i] - df_breakthrough['He bypass flow [mol/s]'][i] if (df_breakthrough[label][i] - df_breakthrough['He bypass flow [mol/s]'][i]) > 0 else 0  for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)

#print('done corrected molar flows corrections')
if Discount_He == False:
    df_breakthrough.loc[:,'Total molar flow [mol/s]'] = pd.Series([df_breakthrough['He molar flow [mol/s]'][i] + df_breakthrough['N2 molar flow [mol/s]'][i] + df_breakthrough['CO2 molar flow [mol/s]'][i] + df_breakthrough['H2O molar flow [mol/s]'][i] + df_breakthrough['O2 molar flow [mol/s]'][i] for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)
else:
    df_breakthrough.loc[:,'Total molar flow [mol/s]'] = pd.Series([df_breakthrough['N2 molar flow [mol/s]'][i] + df_breakthrough['CO2 molar flow [mol/s]'][i] + df_breakthrough['H2O molar flow [mol/s]'][i] + df_breakthrough['O2 molar flow [mol/s]'][i] for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)
#print('done total molar flow')
for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = comp + ' molar flow [mol/s]'
    new_label =  comp + ' mole fraction [-]'
    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i] / df_breakthrough['Total molar flow [mol/s]'][i]  for i in range(len(df_breakthrough['Breakthrough time [s]']))], index=df_breakthrough.index)  

for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = comp + ' mole fraction [-]'
    new_label = comp + ' concentration [mol/m3]'
    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i] * P_exp/(R*T_regen) for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)

for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = 'Interpolated '+ comp + ' flow [%]'
    new_label = comp + ' inlet flow [mol/s]'
    if comp != 'CO2' and comp != 'N2' and comp!= 'H2O':
        df_breakthrough.loc[:,new_label] = pd.Series([0*i for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)
    elif comp == 'CO2':
        df_breakthrough.loc[:,new_label] = pd.Series([max_CO2*df_breakthrough[label][i]/100 for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)
    elif comp == 'N2':
        df_breakthrough.loc[:,new_label] = pd.Series([max_N2*df_breakthrough[label][i]/100 for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)
    elif comp == 'H2O':
        df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough['N2 inlet flow [mol/s]'][i]*H2O_ratio/(1-H2O_ratio) for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)

for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = comp + ' inlet flow [mol/s]'
    new_label = comp + ' inlet mole fraction [-]'
    mole_frac_list = []
    for i in range(len(df_breakthrough['CO2 mole fraction [-]'])):
        total = df_breakthrough['CO2 inlet flow [mol/s]'][i] + df_breakthrough['N2 inlet flow [mol/s]'][i] + df_breakthrough['H2O inlet flow [mol/s]'][i] + df_breakthrough['O2 inlet flow [mol/s]'][i] + df_breakthrough['He inlet flow [mol/s]'][i]
        mole_frac_list.append(df_breakthrough[label][i]/total) 
    df_breakthrough.loc[:,new_label] = pd.Series(mole_frac_list, index=df_breakthrough.index)

for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    label = comp + ' inlet mole fraction [-]'
    new_label = comp + ' inlet concentration [mol/m3]'
    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[label][i] * P_exp/(R*T_regen) for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)



#for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
#    outlet_label = comp + ' mole fraction [-]'
#    inlet_label = comp + ' inlet mole fraction [-]'
#    new_label = 'Normalised ' + outlet_label
#    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[outlet_label][i]/df_breakthrough[inlet_label][i] for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)

#for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
#    outlet_label = comp + ' concentration [mol/m3]'
#    inlet_label = comp + ' inlet concentration [mol/m3]'
#    new_label = 'Normalised ' + outlet_label
#    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[outlet_label][i]/df_breakthrough[inlet_label][i] for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)
    
#for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
#    outlet_label = comp + ' molar flow [mol/s]'
#    inlet_label = comp + ' inlet flow [mol/s]'
#    new_label = 'Normalised ' + outlet_label
#    df_breakthrough.loc[:,new_label] = pd.Series([df_breakthrough[outlet_label][i]/df_breakthrough[inlet_label][i] for i in range(len(df_breakthrough['CO2 mole fraction [-]']))], index=df_breakthrough.index)
if full_output == False:
    order = ['Breakthrough time [s]']
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' inlet flow [mol/s]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' inlet mole fraction [-]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' inlet concentration [mol/m3]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' molar flow [mol/s]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' mole fraction [-]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' concentration [mol/m3]')
    #for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    #    order.append('Normalised ' + comp + ' molar flow [mol/s]')
    #for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    #    order.append('Normalised ' + comp + ' mole fraction [-]')
    #for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    #    order.append('Normalised ' + comp + ' concentration [mol/m3]')
    df_breakthrough_out = df_breakthrough[order]

    startlist = {}
    for comp in ['CO2', 'N2', 'He', 'H2O']:
        for i in range(1,len(df_breakthrough['Breakthrough time [s]'])):
            index = i
            if df_breakthrough['Breakthrough time [s]'][i] > smoothing_start[comp]:
                break
        startlist[comp] = index

    for i in range(1,len(order)):
        label = order[i]
        new_label = 'Smoothed ' + label
        #if ((m.isinf(df_breakthrough_out[label][5])) == False) & (m.isnan(df_breakthrough_out[label][5]) == False) :
        #df_breakthrough_out.loc[0:index,new_label] = pd.Series(df_breakthrough_out[label][0:index])
        new_list = []
        filtered_data = uniform_filter1d(df_breakthrough_out[label], size=filter_window)
        for comp in ['CO2', 'N2', 'He', 'H2O']:
            if comp in label: 
                for q in range(startlist[comp]):
                    new_list.append(df_breakthrough_out[label][q])
                    f=q
                for q in range(f,len(df_breakthrough_out[label])):
                    new_list.append(filtered_data[q])
                df_breakthrough_out.loc[:,new_label] = pd.Series(new_list)
        
        
    df_breakthrough_out.to_csv(filelabel + '.csv', index=False)
else:
    order = ['Breakthrough time [s]']
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' inlet flow [mol/s]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' inlet mole fraction [-]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' inlet concentration [mol/m3]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' molar flow [mol/s]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' mole fraction [-]')
    for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
        order.append(comp + ' concentration [mol/m3]')
    #for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    #    order.append('Normalised ' + comp + ' molar flow [mol/s]')
    #for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    #    order.append('Normalised ' + comp + ' mole fraction [-]')
    #for comp in ['CO2', 'N2', 'He', 'H2O', 'O2']:
    #    order.append('Normalised ' + comp + ' concentration [mol/m3]')


    startlist = {}
    for comp in ['CO2', 'N2', 'He', 'H2O']:
        for i in range(1,len(df_breakthrough['Breakthrough time [s]'])):
            index = i
            if df_breakthrough['Breakthrough time [s]'][i] > smoothing_start[comp]:
                break
        startlist[comp] = index

    for i in range(1,len(order)):
        label = order[i]
        new_label = 'Smoothed ' + label
        #if ((m.isinf(df_breakthrough_out[label][5])) == False) & (m.isnan(df_breakthrough_out[label][5]) == False) :
        #df_breakthrough_out.loc[0:index,new_label] = pd.Series(df_breakthrough_out[label][0:index])
        new_list = []
        filtered_data = uniform_filter1d(df_breakthrough[label], size=filter_window)
        for comp in ['CO2', 'N2', 'He', 'H2O']:
            if comp in label: 
                for q in range(startlist[comp]):
                    new_list.append(df_breakthrough[label][q])
                    f=q
                for q in range(f,len(df_breakthrough[label])):
                    new_list.append(filtered_data[q])
                df_breakthrough.loc[:,new_label] = pd.Series(new_list)
        
        
    df_breakthrough.to_csv(filelabel + '.csv', index=False)