# Water density in GEKKO

In [None]:
%load_ext autoreload

import sys
sys.path.append('../data/')
sys.path.append('../view/')
sys.path.append('../analysis/')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
%matplotlib widget

from nfh_utils import *

## Compute poly fit

In [None]:
%%time
temp_range = np.linspace(10, 80, 100)  # Temperatures in °C

# Compute volumetric heat capacity using CoolProp
volumetric_heat_capacity = water_volumetric_heat_capacity__J_dm_3_K_1(temp_range, np.array(heat_dstr_nl_avg_abs__Pa))

# Fit a polynomial
degree = 3  # Try different degrees (3-5) for best fit
coeffs = np.polyfit(temp_range, volumetric_heat_capacity, degree)
poly_fit = np.polyval(coeffs, temp_range)

# Plot results
fig = plt.figure()
plt.plot(temp_range, volumetric_heat_capacity, 'o', label='CoolProp Data')
plt.plot(temp_range, poly_fit, '-', label=f'Poly Fit (deg {degree})')
plt.xlabel('Temperature [°C]')
plt.ylabel('Volumetric Heat Capacity [J/(dm³·K)]')
plt.legend()
plt.show()

# Print polynomial coefficients
print("Polynomial Coefficients:", coeffs)

# Compute Errors
errors = poly_fit - volumetric_heat_capacity
absolute_errors = np.abs(errors)

# Mean Error and Mean Absolute Error
mean_error = np.mean(errors)
mean_absolute_error = np.mean(absolute_errors)

print(f"Mean Error (ME): {mean_error}  [J/(dm³·K)]")
print(f"Mean Absolute Error (MAE): {mean_absolute_error}  [J/(dm³·K)]")

In [None]:
list(coeffs)

## Compute weighted poly fit

In [None]:
rhc_data_including_predicted_properties_file_path = 'rhc_results_2025-01-26T182144+0100.parquet'

In [None]:
%%time

# Attempt to read the Parquet file
try:
    df_predicted = pd.read_parquet(
        rhc_data_including_predicted_properties_file_path, 
        engine='pyarrow',
        dtype_backend='numpy_nullable'
        )
    print("File was successfully read without specifying compression codec.")
except Exception as e:
    print(f"Error reading file: {e}")

# This seems needed to restore the timezone. TODO: check root cause & fix root cause
df_predicted = df_predicted.tz_convert('Europe/Amsterdam', level='timestamp')

In [None]:
df_predicted['temp_dstr__degC'].describe()

In [None]:
%matplotlib inline
%matplotlib widget
fig = plt.figure()
prop = 'temp_dstr__degC'
df_predicted[prop].plot.hist(bins=100, alpha=0.5, title = prop, density=True)


In [None]:
water_volumetric_heat_capacity__J_dm_3_K_1(df_predicted['temp_dstr__degC'].min(), heat_dstr_nl_avg_abs__Pa)

In [None]:
water_volumetric_heat_capacity__J_dm_3_K_1(df_predicted['temp_dstr__degC'].max(), heat_dstr_nl_avg_abs__Pa)

In [None]:
%%time
df_predicted_clean = df_predicted.dropna(subset=['temp_dstr__degC'])
step_degC = 0.5  # Desired bin step size
temp_dstr_min__degC = np.floor(df_predicted_clean['temp_dstr__degC'].min() / step_degC) * step_degC
temp_dstr_max__degC = np.ceil(df_predicted_clean['temp_dstr__degC'].max() / step_degC) * step_degC

# Generate bins with the exact step size
temp_dstr_range__degC = np.arange(temp_dstr_min__degC, temp_dstr_max__degC + step_degC, step_degC)

# Compute the histogram
temp_dstr_bin_counts__degC, bin_edges = np.histogram(df_predicted_clean['temp_dstr__degC'], bins=len(temp_dstr_range__degC), density=True)

# Use midpoints of bins as the temp range reference points
temp_dstr_bin_centers__degC = (bin_edges[:-1] + bin_edges[1:]) / 2

# Compute volumetric heat capacity using CoolProp
volumetric_heat_capacity = water_volumetric_heat_capacity__J_dm_3_K_1(temp_dstr_bin_centers__degC, np.array(heat_dstr_nl_avg_abs__Pa))

# Fit a polynomial
degree = 3  # Try different degrees (3-5) for best fit
coeffs = np.polyfit(temp_dstr_bin_centers__degC, volumetric_heat_capacity, degree, w=temp_dstr_bin_counts__degC)

In [None]:
list(coeffs)

In [None]:
poly_fit = np.polyval(coeffs, temp_dstr_bin_centers__degC)

In [None]:
# Plot results
fig = plt.figure()
plt.plot(temp_dstr_bin_centers__degC, volumetric_heat_capacity, 'o', label='CoolProp Data')
plt.plot(temp_dstr_bin_centers__degC, poly_fit, '-', label=f'Weighted Poly Fit (deg {degree})')
plt.xlabel('Temperature [°C]')
plt.ylabel('Volumetric Heat Capacity [J/(dm³·K)]')
plt.legend()
plt.show()

# Print polynomial coefficients
print("Polynomial Coefficients:", coeffs)

# Compute Errors
errors = poly_fit - volumetric_heat_capacity
absolute_errors = np.abs(errors)

# Compute Errors
errors = poly_fit - volumetric_heat_capacity
absolute_errors = np.abs(errors)

# Weighted Mean Error (WME) using np.average
weighted_mean_error = np.average(errors, weights=temp_dstr_bin_counts__degC)

# Weighted Mean Absolute Error (WMAE) using np.average
weighted_mean_absolute_error = np.average(absolute_errors, weights=temp_dstr_bin_counts__degC)

print(f"Weighted Mean Error (WME): {weighted_mean_error}  [J/(dm³·K)]")
print(f"Weighted Mean Absolute Error (WMAE): {weighted_mean_absolute_error}  [J/(dm³·K)]")
