# import packages

In [1]:
from io import BytesIO
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.sharepoint.client_context import ClientContext
import pandas as pd

from influxdb import InfluxDBClient
from functools import reduce

import dcor
import seaborn as sns
import matplotlib.pyplot as plt

from statsmodels.graphics.tsaplots import plot_acf

import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.decomposition import PCA
import numpy as np
import pickle

pd.set_option('display.max_columns', 200)

In [2]:
# # Constants for the SharePoint connection
# TENANT_ID = "c9dc265f-a55d-466a-920c-9eb2e81f750f"
# N2_URL = "https://n2applied.sharepoint.com/sites/"
# APPLICATION_ID = "f83dae31-1ffa-4457-8e00-bc48c0f02a29"

# def authenticate_sharepoint(site_name: str):
#     """Authenticate and return a client context for a SharePoint site."""
#     site_url = f'{N2_URL}{site_name}'
#     ctx_auth = AuthenticationContext(url=site_url)
#     ctx_auth.with_interactive(TENANT_ID, APPLICATION_ID)
#     ctx = ClientContext(site_url, ctx_auth)
#     return ctx

# def download_file(ctx: ClientContext, folder_path: str, file_name: str):
#     """Download a file from SharePoint."""
#     file_path = f'{folder_path}/{file_name}'
#     with open(file_name, "wb") as file:
#         ctx.web.get_file_by_server_relative_url(file_path).download(file).execute_query()

# def load_excelsheet(ctx: ClientContext, folder_path: str, file_name: str, sheet_name: str) -> pd.DataFrame:
#     """Load an Excel sheet from SharePoint into a pandas DataFrame."""
#     file_path = f'{folder_path}/{file_name}'
#     excel_file = ctx.web.get_file_by_server_relative_url(file_path)
#     file_content = excel_file.get_content().execute_query()
#     excel_sheet = pd.read_excel(io=BytesIO(file_content.value), sheet_name=sheet_name, skiprows=8)
#     return excel_sheet

# if __name__ == "__main__":
#     # Fill in these variables with your specific values
#     site_name = 'RD'  # Replace with your actual site name
#     folder_path = "Projects/N2-PNG Stability and qualification"  # Replace with your actual folder path
#     file_name = "2024-1068-Testplan and log.xlsx"  # Replace with your actual file name
#     sheet_name = '2024-1018-TestLog'  # Replace with the sheet name you want
#     # Authenticate and obtain client context
#     ctx = authenticate_sharepoint(site_name)
#     # Download the file
#     download_file(ctx, folder_path, file_name)
#     # Load the Excel sheet into a DataFrame
#     excel_data = load_excelsheet(ctx, folder_path, file_name, sheet_name)

In [3]:
excel_data = pd.read_excel('2024-1068-Testplan and log.xlsx','2024-1018-TestLog', skiprows=8)
# excel_data

# Specify TB

In [4]:
tb_number = 14
lt = 30

# Influx data

## filter

In [5]:
## Rename labels
metadata_df = excel_data.rename(columns={'DSE material loss [g/h]':'material_loss_rate'})
metadata_df.columns
## Filter Excel Data
metadata_df = metadata_df[['Test_ID', 'Run Order', 'TB', 'Start Date [dd.mm.yyyy]',
       'Exp start Time [hh:mm]', 'Stop date\n [dd.mm.yyy]',
       'Exp stop Time [hh:mm]', 'Duration [h]', 'Quench diameter\n[mm]',
       'Quench distance\n[mm]', 'Running mode', 'Target Power [kW]',
       'Target current [A]', 'Target voltage [V]', 'Target aiflow [m3/h]',
       'Swirlinator type', 'Swirlinator inlets', 'Inlet diameter [mm]',
       'Coolant temp [C]', 'DSE ', 'DSE type', 'USE #',
       'DSE weight before [g]', 'DSE weight after [g]', 'material_loss_rate',
       'DSE material loss [g]', 'USE depth before [mm]',
       'USE depth after [mm]', 'USE material loss [mm/h]']].copy()

metadata_df['DSE material loss [g/h]'] = pd.to_numeric(metadata_df['material_loss_rate'], errors='coerce')

# dropped non valid rows
metadata_df.dropna(subset=['Test_ID',
                           'Exp start Time [hh:mm]',
                           'Start Date [dd.mm.yyyy]',
                           'Exp start Time [hh:mm]',
                           'Stop date\n [dd.mm.yyy]',
                           'Exp stop Time [hh:mm]',
                           'DSE material loss [g/h]'
                           ], inplace=True)

# Only include rows where Test OK = 1
# excel_data = excel_data.query('`Test OK 1=OK` == 1')
# excel_data = excel_data.query('`Test OK 1=OK` != 1')

metadata_df = metadata_df.query('`DSE ` == ["B52#1","B52#2","B52#3"]')

## Add Timestamps


In [6]:
# Convert columns to datetime and catch errors
metadata_df['exp_start'] = pd.to_datetime(metadata_df['Start Date [dd.mm.yyyy]'].astype(str) + ' ' + metadata_df['Exp start Time [hh:mm]'].astype(str), errors='coerce').dt.tz_localize('Europe/Oslo')
metadata_df['exp_stop'] = pd.to_datetime(metadata_df['Stop date\n [dd.mm.yyy]'].astype(str) + ' ' + metadata_df['Exp stop Time [hh:mm]'].astype(str), errors='coerce').dt.tz_localize('Europe/Oslo')

# Drop rows where datetime conversion failed (NaT values)
metadata_df = metadata_df.dropna(subset=['exp_start', 'exp_stop'])

  metadata_df['exp_start'] = pd.to_datetime(metadata_df['Start Date [dd.mm.yyyy]'].astype(str) + ' ' + metadata_df['Exp start Time [hh:mm]'].astype(str), errors='coerce').dt.tz_localize('Europe/Oslo')
  metadata_df['exp_stop'] = pd.to_datetime(metadata_df['Stop date\n [dd.mm.yyy]'].astype(str) + ' ' + metadata_df['Exp stop Time [hh:mm]'].astype(str), errors='coerce').dt.tz_localize('Europe/Oslo')


In [7]:
## Removing specific datapoints
metadata_df = metadata_df.query(f'TB == {tb_number}')
test_ids_to_exclude = []
metadata_df = metadata_df[~metadata_df['Test_ID'].isin(test_ids_to_exclude)]
metadata_df_copy = metadata_df.set_index('Test_ID').copy()
metadata_dict = metadata_df_copy.to_dict('index')
# This eliminates need for filtering in subsequent operations
print(f"antall exp = {len(metadata_dict.keys())}\nexcluded = {test_ids_to_exclude}")

antall exp = 9
excluded = []


## Sensor

In [8]:
## Sensor Data
# Constants
HOST = '192.168.1.3'
DATABASE = 'rnd'
SELECTED_MACHINE = f'TS{tb_number}'  # Avoid hardcoding by fetching dynamically or using config

# Use a context manager to handle the InfluxDB client connection
with InfluxDBClient(host=HOST, database=DATABASE) as client:
    # Fetch measurements and filter for the selected machine
    measurements = client.query('SHOW MEASUREMENTS')
    selected_machine = next((row['name'] for row in measurements.get_points() if row['name'] == SELECTED_MACHINE), None)

    if not selected_machine:
        raise ValueError(f"Machine '{SELECTED_MACHINE}' not found in measurements.")

    # Fetch equipment and data tag values in a single query
    tag_query = f'SHOW TAG VALUES FROM "{SELECTED_MACHINE}" WITH KEY IN ("equipment", "data")'
    tag_results = client.query(tag_query)

    # Extract equipment and data lists
    equipment_list = [row['value'] for row in tag_results.get_points() if row['key'] == 'equipment']
    data_list = [row['value'] for row in tag_results.get_points() if row['key'] == 'data']

# Print results
# print("Selected Machine:", selected_machine)
# print("Equipment List:", equipment_list)
# print("Data List:", data_list)

# for equipment in equipment_list:
#      for data in data_list:
#          print(equipment,data)

# Hardcoded equipment and data lists
f_equipment_list = ['G2101', 'FV2001', 'system', 'FT2301', 'TT2302', 'TT2308', 'FT2302']
f_data_list = ['arc_voltage', 'arc_power', 'arc_current', 'flow', 'pressure', 'running', 'temperature']

# Initialize a dictionary to store DataFrames for each Test ID
sensor_dict = {}

for Test_ID, excel in metadata_dict.items():
    exp_start = excel['exp_start']
    exp_stop = excel['exp_stop']
    
    # Initialize an empty list to store DataFrames for the current Test ID
    dataframes = []
    
    # Iterate over equipment and data lists
    for equipment in f_equipment_list:
        for data in f_data_list:
            query = f"""
            SELECT mean(value) AS mean_value
            FROM {selected_machine}
            WHERE equipment = '{equipment}' AND data = '{data}'
            AND time > '{exp_start.isoformat()}' AND time < '{exp_stop.isoformat()}'
            GROUP BY time({lt}s, {lt*(-1)}s)
            """
            data_points = client.query(query)
            data_rows = list(data_points.get_points())
            
            if data_rows:
                # Create a DataFrame for the current equipment and data
                temp_df = pd.DataFrame({
                    'timestamp': pd.to_datetime([dp['time'] for dp in data_rows]).tz_convert('Europe/Oslo'),
                    f'{equipment}({data})': [dp['mean_value'] for dp in data_rows]
                })

                # Append the DataFrame to the list
                dataframes.append(temp_df)
    
    # Merge all DataFrames for the current Test ID on the 'timestamp' column
    if dataframes:
        merged_sensor_df = reduce(lambda left, right: pd.merge(left, right, on='timestamp', how='outer'), dataframes)
        # merged_sensor_df.set_index('timestamp', inplace=True)
    else:
        merged_sensor_df = pd.DataFrame()

    merged_sensor_df = merged_sensor_df.interpolate()
    merged_sensor_df = merged_sensor_df.dropna()
    
    # Store the merged DataFrame in the dictionary with Test ID as the key
    sensor_dict[Test_ID] = merged_sensor_df

## handle missing numbers

In [9]:
print(merged_sensor_df.isna().sum().to_string())

timestamp              0
G2101(arc_voltage)     0
G2101(arc_power)       0
G2101(arc_current)     0
G2101(running)         0
G2101(temperature)     0
FV2001(flow)           0
FV2001(pressure)       0
FV2001(temperature)    0
system(running)        0
FT2301(flow)           0
FT2301(temperature)    0
TT2302(temperature)    0
TT2308(temperature)    0
FT2302(flow)           0
FT2302(temperature)    0


## Ftir

In [10]:
# ## FTIR Data
# # Constants
# HOST = '192.168.1.3'
# DATABASE = 'instruments'
# SELECTED_MACHINE = 'protea_ftir_2'  # Avoid hardcoding by fetching dynamically or using config

# # Use a context manager to handle the InfluxDB client connection
# with InfluxDBClient(host=HOST, database=DATABASE) as client:
#     # Fetch measurements and filter for the selected machine
#     measurements = client.query('SHOW MEASUREMENTS')
#     selected_machine = next((row['name'] for row in measurements.get_points() if row['name'] == SELECTED_MACHINE), None)

#     if not selected_machine:
#         raise ValueError(f"Machine '{SELECTED_MACHINE}' not found in measurements.")

#     # Fetch equipment and data tag values in a single query
#     tag_query = f'SHOW TAG VALUES FROM "{SELECTED_MACHINE}" WITH KEY IN ("equipment", "data")'
#     tag_results = client.query(tag_query)

#     # Extract equipment and data lists
#     equipment_list = [row['value'] for row in tag_results.get_points() if row['key'] == 'equipment']
#     data_list = [row['value'] for row in tag_results.get_points() if row['key'] == 'data']

# # # Print results
# # print("Selected Machine:", selected_machine)
# # print("Equipment List:", equipment_list)
# # print("Data List:", data_list)

# # for equipment in equipment_list:
# #      for data in data_list:
# #          print(equipment,data)

# # Hardcoded equipment and data lists
# f_equipment_list = ['Channel_2']
# f_data_list = ['NO_Corrected', 'NO2_Corrected']

# # Initialize a dictionary to store DataFrames for each Test ID
# ftir_dict = {}

# for Test_ID, excel in metadata_dict.items():
#     exp_start = excel['exp_start']
#     exp_stop = excel['exp_stop']
    
#     # Initialize an empty list to store DataFrames for the current Test ID
#     dataframes = []
    
#     # Iterate over equipment and data lists
#     for equipment in f_equipment_list:
#         for data in f_data_list:
#             query = f"""
#             SELECT mean(value) AS mean_value
#             FROM {selected_machine}
#             WHERE equipment = '{equipment}' AND data = '{data}'
#             AND time > '{exp_start.isoformat()}' AND time < '{exp_stop.isoformat()}'
#             GROUP BY time(30s, -30s)
#             """
#             data_points = client.query(query)
#             data_rows = list(data_points.get_points())
            
#             if data_rows:
#                 # Create a DataFrame for the current equipment and data
#                 temp_df = pd.DataFrame({
#                     'timestamp': pd.to_datetime([dp['time'] for dp in data_rows]).tz_convert('Europe/Oslo'),
#                     f'{equipment}({data})': [dp['mean_value'] for dp in data_rows]
#                 })

#                 # Append the DataFrame to the list
#                 dataframes.append(temp_df)
    
#     # Merge all DataFrames for the current Test ID on the 'timestamp' column
#     if dataframes:
#         merged_ftir_df = reduce(lambda left, right: pd.merge(left, right, on='timestamp', how='outer'), dataframes)
#         # merged_ftir_df.set_index('timestamp', inplace=True)
#     else:
#         merged_ftir_df = pd.DataFrame()
    
#     # Store the merged DataFrame in the dictionary with Test ID as the key
#     ftir_dict[Test_ID] = merged_ftir_df

# Manipulations


## Merge Sensor and FTIR Data

In [11]:
## Manipulations
# Check column names in sensor_dict
print(sensor_dict[Test_ID].columns)
# Check column names in ftir_dict
# print(ftir_dict[Test_ID].columns)

merged_dict = {}

for Test_ID in sensor_dict.keys():
    # Merge the DataFrames from both dictionaries on the 'timestamp' column
    # influx_df = pd.merge(sensor_dict[Test_ID], ftir_dict[Test_ID], on='timestamp', how='outer')
    influx_df = pd.DataFrame(sensor_dict[Test_ID])
    # influx_df = influx_df.query('`G2101(arc_power)` > 15 & `G2101(arc_voltage)` > 500 & `FV2001(flow)` > 25')
    influx_df = influx_df.query('`system(running)` == 1')

    # influx_df['NO/NO2'] = influx_df['Channel_2(NO_Corrected)'] / influx_df['Channel_2(NO2_Corrected)']
    # influx_df['NOx%'] = influx_df['Channel_2(NO_Corrected)'] + influx_df['Channel_2(NO2_Corrected)']
    # std_temp = 273
    # std_pressure = 101325
    # Mol_mas_N = 14
    # R = 8.3145
    # influx_df['EC_calc'] = (100000 * std_temp * R * influx_df['G2101(arc_power)']) / (std_pressure * Mol_mas_N * influx_df['FV2001(flow)'] * influx_df['NOx%'])
    
    influx_df['enthalpy_calc'] = influx_df['G2101(arc_power)'] / influx_df['FV2001(flow)']

    # Store the merged DataFrame in the dictionary with Test ID as the key
    merged_dict[Test_ID] = influx_df

merged_influx_df = pd.concat(merged_dict.values(), ignore_index=True)

Index(['timestamp', 'G2101(arc_voltage)', 'G2101(arc_power)',
       'G2101(arc_current)', 'G2101(running)', 'G2101(temperature)',
       'FV2001(flow)', 'FV2001(pressure)', 'FV2001(temperature)',
       'system(running)', 'FT2301(flow)', 'FT2301(temperature)',
       'TT2302(temperature)', 'TT2308(temperature)', 'FT2302(flow)',
       'FT2302(temperature)'],
      dtype='object')


In [12]:
merged_influx_df.isna().sum()

timestamp              0
G2101(arc_voltage)     0
G2101(arc_power)       0
G2101(arc_current)     0
G2101(running)         0
G2101(temperature)     0
FV2001(flow)           0
FV2001(pressure)       0
FV2001(temperature)    0
system(running)        0
FT2301(flow)           0
FT2301(temperature)    0
TT2302(temperature)    0
TT2308(temperature)    0
FT2302(flow)           0
FT2302(temperature)    0
enthalpy_calc          0
dtype: int64

## Create Statistical Dataframe


In [13]:
stat_dict = {}

# Iterate over the keys (Test_IDs)
for Test_ID in merged_dict.keys():
    # merged_df = pd.merge(merged_dict[Test_ID], ftir_dict[Test_ID], on='timestamp', how='outer')
    merged_df = merged_dict[Test_ID]
    # merged_df['NO/NO2']=merged_df['Channel_2(NO_Corrected)']/merged_df['Channel_2(NO2_Corrected)']
    # merged_df['NOx%']=merged_df['Channel_2(NO_Corrected)']+merged_df['Channel_2(NO2_Corrected)']
    
    # std_temp = 273
    # std_pressure = 101325
    # Mol_mas_N = 14
    # R = 8.3145
    # merged_df['EC_calc'] = (100000*std_temp*R*merged_df['G2101(arc_power)']) / (std_pressure*Mol_mas_N*merged_df['FV2001(flow)']*merged_df['NOx%'])
    merged_df['enthalpy_calc'] = merged_df['G2101(arc_power)'] / merged_df['FV2001(flow)']

    # Exclude the 'timestamp' column from calculations
    data_columns = merged_df.columns.difference(['timestamp'])
    mean_values = merged_df[data_columns].mean()
    std_values = merged_df[data_columns].std()
    cv_values = std_values / mean_values

    # Store the results in a DataFrame
    stats = {'mean': mean_values, 'std': std_values, 'cv': cv_values}
    stat_df = pd.DataFrame(stats).transpose()
    stat_df['Test_ID'] = Test_ID
    stat_dict[Test_ID] = stat_df

# Concatenate all DataFrames into one
all_stats_df = pd.concat(stat_dict.values(), ignore_index=False)

stat_df = pd.DataFrame()
# For each parameter, append mean, std, cv in order
for parameter in data_columns:
    stat_df = pd.concat(
        [stat_df, 
         all_stats_df.loc['mean', [parameter]].reset_index(drop=True).rename(columns={parameter: parameter + '_mean'}),
         all_stats_df.loc['std', [parameter]].reset_index(drop=True).rename(columns={parameter: parameter + '_std'}),
         all_stats_df.loc['cv', [parameter]].reset_index(drop=True).rename(columns={parameter: parameter + '_cv'})],
        axis=1
    )
# Add Test_ID as a column
stat_df['Test_ID'] = all_stats_df.iloc[::3, all_stats_df.columns.get_loc('Test_ID')].to_list()

print("Reformatted Statistics DataFrame:")
stat_df.tail(10)
# print(stat_df.to_string(index=False))

Reformatted Statistics DataFrame:


Unnamed: 0,FT2301(flow)_mean,FT2301(flow)_std,FT2301(flow)_cv,FT2301(temperature)_mean,FT2301(temperature)_std,FT2301(temperature)_cv,FT2302(flow)_mean,FT2302(flow)_std,FT2302(flow)_cv,FT2302(temperature)_mean,FT2302(temperature)_std,FT2302(temperature)_cv,FV2001(flow)_mean,FV2001(flow)_std,FV2001(flow)_cv,FV2001(pressure)_mean,FV2001(pressure)_std,FV2001(pressure)_cv,FV2001(temperature)_mean,FV2001(temperature)_std,FV2001(temperature)_cv,G2101(arc_current)_mean,G2101(arc_current)_std,G2101(arc_current)_cv,G2101(arc_power)_mean,G2101(arc_power)_std,G2101(arc_power)_cv,G2101(arc_voltage)_mean,G2101(arc_voltage)_std,G2101(arc_voltage)_cv,G2101(running)_mean,G2101(running)_std,G2101(running)_cv,G2101(temperature)_mean,G2101(temperature)_std,G2101(temperature)_cv,TT2302(temperature)_mean,TT2302(temperature)_std,TT2302(temperature)_cv,TT2308(temperature)_mean,TT2308(temperature)_std,TT2308(temperature)_cv,enthalpy_calc_mean,enthalpy_calc_std,enthalpy_calc_cv,system(running)_mean,system(running)_std,system(running)_cv,Test_ID
0,4.676316,0.02664,0.005697,29.550939,0.635527,0.021506,27.737117,0.123568,0.004455,39.961587,0.898632,0.022487,29.982734,0.0479,0.001598,3.938804,0.006732,0.001709,21.145655,0.513905,0.024303,32.973965,0.000933,2.8e-05,27.328839,0.077398,0.002832,828.800451,2.347421,0.002832,1.0,0.0,0.0,24.23285,0.512438,0.021146,30.118918,0.652227,0.021655,42.873879,0.970679,0.02264,0.911487,0.002675,0.002935,1.0,0.0,0.0,2024-1068-120
1,4.889309,0.028961,0.005923,28.13844,1.277645,0.045406,27.738617,0.107266,0.003867,40.005234,0.383346,0.009582,29.981446,0.048804,0.001628,3.943875,0.017754,0.004502,21.552944,1.117885,0.051867,32.973979,0.000719,2.2e-05,27.475157,0.082987,0.00302,833.237549,2.5165,0.00302,1.0,0.0,0.0,22.627578,0.933686,0.041263,28.700513,1.400378,0.048793,42.976315,0.397829,0.009257,0.916407,0.002936,0.003204,1.0,0.0,0.0,2024-1068-121
2,4.88635,0.031468,0.00644,29.220048,1.431132,0.048978,27.716603,0.115451,0.004165,39.98976,0.822674,0.020572,29.985468,0.047154,0.001573,4.038719,0.028464,0.007048,21.819655,1.257468,0.05763,32.973966,0.000689,2.1e-05,27.770999,0.13363,0.004812,842.209779,4.052709,0.004812,1.0,0.0,0.0,24.994703,1.896199,0.075864,29.807576,1.52905,0.051297,43.067236,0.813831,0.018897,0.92615,0.004534,0.004895,1.0,0.0,0.0,2024-1068-122
3,4.851214,0.069643,0.014356,28.725536,1.448817,0.050437,28.001194,0.127921,0.004568,39.998634,0.492523,0.012313,29.992796,0.046749,0.001559,4.187191,0.119349,0.028503,20.923603,1.884588,0.09007,32.969704,0.009509,0.000288,27.884567,0.127676,0.004579,845.762515,3.994146,0.004723,0.999989,0.001653,0.001653,23.636181,1.459473,0.061747,29.325333,1.571478,0.053588,43.179502,0.504649,0.011687,0.92971,0.004272,0.004595,1.0,0.0,0.0,2024-1068-124
4,4.726264,0.210947,0.044633,28.366036,1.419902,0.050056,26.946948,0.153717,0.005704,39.979566,0.922348,0.02307,29.978027,0.058061,0.001937,3.843688,0.080186,0.020862,20.433826,1.682825,0.082355,32.973982,0.001097,3.3e-05,27.181825,0.103966,0.003825,824.341573,3.149363,0.00382,0.999824,0.005993,0.005994,22.807115,2.11756,0.092846,28.937858,1.471385,0.050846,42.928017,0.956672,0.022285,0.906726,0.003399,0.003748,1.0,0.0,0.0,2024-1068-127
5,4.498685,0.036538,0.008122,30.680112,1.795429,0.058521,26.849157,0.107258,0.003995,39.996619,0.915212,0.022882,29.970547,0.050729,0.001693,3.781711,0.013198,0.00349,22.397849,2.158553,0.096373,32.973967,0.00103,3.1e-05,27.104763,0.120345,0.00444,822.004838,3.649841,0.00444,0.999858,0.00558,0.005581,25.721261,1.866109,0.072551,31.367702,1.890704,0.060275,42.891363,0.976239,0.022761,0.904381,0.004029,0.004456,1.0,0.0,0.0,2024-1068-129
6,4.529757,0.030576,0.00675,29.655975,1.224463,0.041289,26.885565,0.106762,0.003971,39.997707,0.896778,0.022421,29.980311,0.048505,0.001618,3.795918,0.008297,0.002186,19.988359,1.236016,0.061837,32.973985,0.000692,2.1e-05,27.113491,0.056257,0.002075,822.269135,1.706313,0.002075,1.0,0.0,0.0,24.785776,1.498059,0.06044,30.289766,1.317759,0.043505,42.853961,0.910877,0.021255,0.904378,0.00207,0.002289,1.0,0.0,0.0,2024-1068-131
7,4.571088,0.038869,0.008503,30.245302,2.30502,0.076211,26.916651,0.108707,0.004039,39.98602,0.88855,0.022222,29.977813,0.049673,0.001657,3.823647,0.012753,0.003335,20.613326,2.837319,0.137645,32.973958,0.003444,0.000104,27.23052,0.090678,0.00333,825.81892,2.747752,0.003327,0.999841,0.005886,0.005887,25.596404,2.20189,0.086023,30.892565,2.408768,0.077972,42.858642,0.916163,0.021376,0.908357,0.003114,0.003428,1.0,0.0,0.0,2024-1068-133
8,4.562036,0.025699,0.005633,29.491263,0.950544,0.032231,26.972989,0.101991,0.003781,40.00231,0.701049,0.017525,29.983457,0.048613,0.001621,3.854558,0.008756,0.002271,18.471521,0.553176,0.029948,32.973971,0.000762,2.3e-05,27.331419,0.071611,0.00262,828.878606,2.171955,0.00262,1.0,0.0,0.0,25.210345,0.955986,0.03792,30.121956,1.114762,0.037008,42.903049,0.701831,0.016359,0.911552,0.002539,0.002785,1.0,0.0,0.0,2024-1068-136


## Merge Influx and excel


In [14]:
final_df = pd.merge(metadata_df_copy, stat_df, on='Test_ID')
# final_df = final_df.drop(columns=[col for col in final_df.columns if 'cv' in col])
# final_df.to_csv('final_df.csv')

# id = final_df[final_df['Test_ID']=='2025-exp30']
# print('exp 30 energy cost =', id['EC_calc_mean'].to_string(index=False))

# final_df.isna().sum()

final_df.tail(5)

Unnamed: 0,Test_ID,Run Order,TB,Start Date [dd.mm.yyyy],Exp start Time [hh:mm],Stop date\n [dd.mm.yyy],Exp stop Time [hh:mm],Duration [h],Quench diameter\n[mm],Quench distance\n[mm],Running mode,Target Power [kW],Target current [A],Target voltage [V],Target aiflow [m3/h],Swirlinator type,Swirlinator inlets,Inlet diameter [mm],Coolant temp [C],DSE,DSE type,USE #,DSE weight before [g],DSE weight after [g],material_loss_rate,DSE material loss [g],USE depth before [mm],USE depth after [mm],USE material loss [mm/h],DSE material loss [g/h],exp_start,exp_stop,FT2301(flow)_mean,FT2301(flow)_std,FT2301(flow)_cv,FT2301(temperature)_mean,FT2301(temperature)_std,FT2301(temperature)_cv,FT2302(flow)_mean,FT2302(flow)_std,FT2302(flow)_cv,FT2302(temperature)_mean,FT2302(temperature)_std,FT2302(temperature)_cv,FV2001(flow)_mean,FV2001(flow)_std,FV2001(flow)_cv,FV2001(pressure)_mean,FV2001(pressure)_std,FV2001(pressure)_cv,FV2001(temperature)_mean,FV2001(temperature)_std,FV2001(temperature)_cv,G2101(arc_current)_mean,G2101(arc_current)_std,G2101(arc_current)_cv,G2101(arc_power)_mean,G2101(arc_power)_std,G2101(arc_power)_cv,G2101(arc_voltage)_mean,G2101(arc_voltage)_std,G2101(arc_voltage)_cv,G2101(running)_mean,G2101(running)_std,G2101(running)_cv,G2101(temperature)_mean,G2101(temperature)_std,G2101(temperature)_cv,TT2302(temperature)_mean,TT2302(temperature)_std,TT2302(temperature)_cv,TT2308(temperature)_mean,TT2308(temperature)_std,TT2308(temperature)_cv,enthalpy_calc_mean,enthalpy_calc_std,enthalpy_calc_cv,system(running)_mean,system(running)_std,system(running)_cv
4,2024-1068-127,127.0,14,2025-02-14 00:00:00,14:00:00,2025-02-21 00:00:00,12:54:00,144.4,6.0,,SLARP+C,27.0,33.0,,,,,,40,B52#3,W-11-80-batch52,2.0,884.36,884.25,0.000762,0.11,5.1,5.0,-0.000693,0.000762,2025-02-14 14:00:00+01:00,2025-02-21 12:54:00+01:00,4.726264,0.210947,0.044633,28.366036,1.419902,0.050056,26.946948,0.153717,0.005704,39.979566,0.922348,0.02307,29.978027,0.058061,0.001937,3.843688,0.080186,0.020862,20.433826,1.682825,0.082355,32.973982,0.001097,3.3e-05,27.181825,0.103966,0.003825,824.341573,3.149363,0.00382,0.999824,0.005993,0.005994,22.807115,2.11756,0.092846,28.937858,1.471385,0.050846,42.928017,0.956672,0.022285,0.906726,0.003399,0.003748,1.0,0.0,0.0
5,2024-1068-129,129.0,14,2025-02-21 00:00:00,13:09:00,2025-02-27 00:00:00,08:22:00,76.3,6.0,,SLARP+C,27.0,33.0,,,,,,40,B52#3,W-11-80-batch52,2.0,884.25,884.11,0.001835,0.14,5.0,5.1,0.001311,0.001835,2025-02-21 13:09:00+01:00,2025-02-27 08:22:00+01:00,4.498685,0.036538,0.008122,30.680112,1.795429,0.058521,26.849157,0.107258,0.003995,39.996619,0.915212,0.022882,29.970547,0.050729,0.001693,3.781711,0.013198,0.00349,22.397849,2.158553,0.096373,32.973967,0.00103,3.1e-05,27.104763,0.120345,0.00444,822.004838,3.649841,0.00444,0.999858,0.00558,0.005581,25.721261,1.866109,0.072551,31.367702,1.890704,0.060275,42.891363,0.976239,0.022761,0.904381,0.004029,0.004456,1.0,0.0,0.0
6,2024-1068-131,131.0,14,2025-02-27 00:00:00,09:00:00,2025-03-03 00:00:00,07:42:00,73.3,6.0,,SLARP+C,27.0,33.0,,,,,,40,B52#3,W-11-80-batch52,2.0,884.11,883.92,0.002592,0.19,5.1,5.1,0.0,0.002592,2025-02-27 09:00:00+01:00,2025-03-03 07:42:00+01:00,4.529757,0.030576,0.00675,29.655975,1.224463,0.041289,26.885565,0.106762,0.003971,39.997707,0.896778,0.022421,29.980311,0.048505,0.001618,3.795918,0.008297,0.002186,19.988359,1.236016,0.061837,32.973985,0.000692,2.1e-05,27.113491,0.056257,0.002075,822.269135,1.706313,0.002075,1.0,0.0,0.0,24.785776,1.498059,0.06044,30.289766,1.317759,0.043505,42.853961,0.910877,0.021255,0.904378,0.00207,0.002289,1.0,0.0,0.0
7,2024-1068-133,133.0,14,2025-03-03 00:00:00,08:07:00,2025-03-10 00:00:00,13:44:00,146.7,6.0,,SLARP+C,27.0,33.0,,,,,,40,B52#3,W-11-80-batch52,2.0,883.92,883.4,0.003545,0.52,5.1,5.2,0.000682,0.003545,2025-03-03 08:07:00+01:00,2025-03-10 13:44:00+01:00,4.571088,0.038869,0.008503,30.245302,2.30502,0.076211,26.916651,0.108707,0.004039,39.98602,0.88855,0.022222,29.977813,0.049673,0.001657,3.823647,0.012753,0.003335,20.613326,2.837319,0.137645,32.973958,0.003444,0.000104,27.23052,0.090678,0.00333,825.81892,2.747752,0.003327,0.999841,0.005886,0.005887,25.596404,2.20189,0.086023,30.892565,2.408768,0.077972,42.858642,0.916163,0.021376,0.908357,0.003114,0.003428,1.0,0.0,0.0
8,2024-1068-136,136.0,14,2025-03-10 00:00:00,14:00:00,2025-03-12 00:00:00,11:30:00,45.6,6.0,,SLARP+C,27.0,33.0,,,,,,40,B52#3,W-11-80-batch52,2.0,883.4,883.17,0.005044,0.23,5.2,5.2,0.0,0.005044,2025-03-10 14:00:00+01:00,2025-03-12 11:30:00+01:00,4.562036,0.025699,0.005633,29.491263,0.950544,0.032231,26.972989,0.101991,0.003781,40.00231,0.701049,0.017525,29.983457,0.048613,0.001621,3.854558,0.008756,0.002271,18.471521,0.553176,0.029948,32.973971,0.000762,2.3e-05,27.331419,0.071611,0.00262,828.878606,2.171955,0.00262,1.0,0.0,0.0,25.210345,0.955986,0.03792,30.121956,1.114762,0.037008,42.903049,0.701831,0.016359,0.911552,0.002539,0.002785,1.0,0.0,0.0


# save as pickles

In [15]:
merged_influx_df.to_pickle(f'TB{tb_number}_merged_influx_df.pkl')
stat_df.to_pickle(f'TB{tb_number}_stat_df.pkl')
final_df.to_pickle(f'TB{tb_number}_final_df.pkl')

with open(f'TB{tb_number}_merged_dict.pkl', 'wb') as file:
    pickle.dump(merged_dict, file)
# Save sensor_dict as a pickle file
with open(f'TB{tb_number}_sensor_dict.pkl', 'wb') as file:
    pickle.dump(sensor_dict, file)
# Save ftir_dict as a pickle file
# with open(f'TB{tb_number}_ftir_dict.pkl', 'wb') as file:
#     pickle.dump(ftir_dict, file)

In [16]:
merged_df.isna().sum()

timestamp              0
G2101(arc_voltage)     0
G2101(arc_power)       0
G2101(arc_current)     0
G2101(running)         0
G2101(temperature)     0
FV2001(flow)           0
FV2001(pressure)       0
FV2001(temperature)    0
system(running)        0
FT2301(flow)           0
FT2301(temperature)    0
TT2302(temperature)    0
TT2308(temperature)    0
FT2302(flow)           0
FT2302(temperature)    0
enthalpy_calc          0
dtype: int64