In [None]:
import pandas as pd
import numpy as np
import itertools
import drm_basic
import time
from drm_basic import drm_methods, drm_utils
from openpyxl import load_workbook
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

In [None]:
# Creating the grid with pressure, particle size, and function names
pressure_values = [1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 2.1, 2.3]
particle_sizes = [0.5, 2.5, 6.5, 11.5, 16.5, 22.5, 30.5]
function_names = ["BC5", "LL5", "Gomp4"]

# Create a DataFrame using the cartesian product (equivalent of expand.grid in R)
Grid_all = pd.DataFrame(list(itertools.product(pressure_values, particle_sizes, function_names)),
                        columns=['pres', 'part_size', 'funct'])

In [None]:
# List of gases
gases = ["CO2", "Helium", "Luft"]

In [None]:
def model_performance(Grid_all, gases):
    # Setting data frame columns
    column_names = ["Model", "RMSE", "Mean Absolute Error", "Pressure", "Particle size", 
                    "Coeff_b", "Coeff_c", "Coeff_d", "Coeff_e", "Coeff_f"]
    
    # Dictionary to store results for each gas
    results = {}
    
    # Iterating through all gases
    for gas in gases:
        # Create an empty DataFrame with the assigned column names
        result_df = pd.DataFrame(columns=column_names)
        
        # Iterating over each row in the grid
        for index, row in Grid_all.iterrows():
            ele = row

            if index % 21 == 0:
                # Read training data sheet = 1
                print(ele['pres'])
                sheet1 = pd.read_excel(f"{gas}/Auswertung_Time_Averaged_{ele['pres']}_{gas}.xlsx", sheet_name=0)
    
                # Read test data sheet = 1
                sheet2 = pd.read_excel(f"{gas}/Auswertung_Time_Averaged_{ele['pres']}_{gas}.xlsx", sheet_name=1)

            # Prepare training and test data sets for model building and testing
            # Training set
            train_data_name = f"train_data_{ele['pres']}_{gas}_{ele['part_size']}"
            # print(train_data_name)
            x_train = sheet1[f"Zeit {ele['part_size']}"]
            y_train = sheet1[f"Geschwindigkeit {ele['part_size']}"]
            train_data = pd.DataFrame({"x": x_train, "y": y_train}).dropna()

            # Test set
            test_data_name = f"test_data_{ele['pres']}_{gas}_{ele['part_size']}"
            # print(test_data_name)
            x_test = sheet2[f"Zeit {ele['part_size']}"]
            y_test = sheet2[f"Geschwindigkeit {ele['part_size']}"]
            test_data = pd.DataFrame({"x": x_test, "y": y_test}).dropna()

            # Training model
            funct = ele['funct']
            # model_name = f"model_{ele['pres']}_{gas}_{ele['part_size']}_{funct}"
            model_name = funct
            model = getattr(drm_methods, funct)(train_data['y'], train_data['x'])

            # Model fitting using test data and evaluation metrics
            predicted_values = drm_utils.drm_predict(model, test_data['x'])
            rmse_val = np.sqrt(mean_squared_error(test_data['y'], predicted_values))
            mean_ae = mean_absolute_error(test_data['y'], predicted_values)
            print(f"Model {funct} RMSE and Mean Absolute Error: {rmse_val}, {mean_ae}")

            # Collecting coefficients from the model
            coeffs = model[0].x
            coeffs = list(coeffs[:5]) + [None]*(5-len(coeffs[:5]))
            row_data = [model_name, rmse_val, mean_ae, ele['pres'], ele['part_size']] + list(coeffs[:5])
            result_df.loc[len(result_df)] = row_data
        
        # Store results for the current gas
        results[gas] = result_df
    
    return results

In [None]:
# Storing model and parameter coefficients information for each gas
start_time = time.time()
result_CO2 = model_performance(Grid_all, gases=["CO2"])
result_Helium = model_performance(Grid_all, gases=["Helium"])
result_Luft = model_performance(Grid_all, gases=["Luft"])
end_time = time.time()
print(f"Time taken by function: {end_time - start_time} seconds")

In [None]:
# Write dataframe to Excel files
# result_CO2["CO2"].to_excel("result_CO2.xlsx", index=False)
# result_Helium["Helium"].to_excel("result_Helium.xlsx", index=False)
# result_Luft["Luft"].to_excel("result_Luft.xlsx", index=False)