# Get actual higher value of gas delivered to customers in NL and write to Parquet file

In [None]:
import pandas as pd
import sys
# sys.path.append('../data/')
# sys.path.append('../view/')
sys.path.append('../analysis/')
from nfh_utils import *

import matplotlib.pyplot as plt
%matplotlib inline
%matplotlib widget

rhc_gas_quality_file = 'rhc_gas_quality.parquet'

## Get CSV files

In [None]:
# As long as automatic download is not workind, download manually from: https://www.mijnenergiedata.nl/calorische-gaswaarden/

datafiles=[
    "calorische_uurwaarden_202311.csv", 
    "calorische_uurwaarden_202312.csv",
    "calorische_uurwaarden_202401.csv",
    "calorische_uurwaarden_202402.csv",
    "calorische_uurwaarden_202403.csv",
    "calorische_uurwaarden_202404.csv"
]


## Merge and process CSV files

In [None]:
# Function to read and process each file
def read_and_process_csv(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    return df

In [None]:
%%time
# Read each CSV file into a DataFrame and store them in a list
dataframes = [read_and_process_csv(file) for file in datafiles]

# Concatenate all DataFrames into a single DataFrame
df = pd.concat(dataframes, ignore_index=True)

# Parse the `date_time` column as UTC
df['date_time'] = pd.to_datetime(df['date_time'], format='%Y-%m-%dT%H:%M:%S%z', utc=True)

# Convert the datetime from UTC to Europe/Amsterdam timezone
df['date_time'] = df['date_time'].dt.tz_convert('Europe/Amsterdam')

# multiply to align convert MJ to J
df['value'] = df['value'] * 1e6

# Rename the `value` column
df.rename(columns={'value': 'actual_gas_std_hhv__J_m_3'}, inplace=True)

# Rename the `date_time` column
df.rename(columns={'date_time': 'timestamp'}, inplace=True)    


In [None]:
df

In [None]:
# Calculate variability across grid_area
grid_area_variability = df.groupby('grid_area')['actual_gas_std_hhv__J_m_3'].std()

# Calculate variability over time (across all grid_areas)
time_variability = df.groupby('timestamp')['actual_gas_std_hhv__J_m_3'].std()

In [None]:
grid_area_variability.mean()

In [None]:
time_variability.mean()

In [None]:
df_nl = df.groupby('timestamp')['actual_gas_std_hhv__J_m_3'].mean().reset_index().set_index('timestamp')

In [None]:
df_nl

In [None]:
df_nl.describe()

## Write to Parquet file


In [None]:
%%time 
df_nl.to_parquet(rhc_gas_quality_file, index=True, engine='pyarrow')

## Inspect contents


In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_nl.index, df_nl['actual_gas_std_hhv__J_m_3'], label='Gas HHV', marker='.')
plt.xlabel('Timestamp')
plt.ylabel('Gas HHV (J/m^3)')
plt.title('Average calorific value of gas delivered')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
plt.scatter(df_nl.index, df_nl['actual_gas_std_hhv__J_m_3'], label='Gas HHV', marker='.')
plt.xlabel('Timestamp')
plt.ylabel('Gas HHV (J/m^3)')
plt.title('Average calorific value of gas delivered')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
df_nl['actual_gas_std_hhv__J_m_3'].describe()

In [None]:
df_nl['actual_gas_std_hhv__J_m_3'].max()/df_nl['actual_gas_std_hhv__J_m_3'].mean()

In [None]:
df_nl['actual_gas_std_hhv__J_m_3'].mean()/df_nl['actual_gas_std_hhv__J_m_3'].min()

In [None]:
df_nl['actual_gas_std_hhv__J_m_3'].max()/df_nl['actual_gas_std_hhv__J_m_3'].min()

In [None]:
print(f"{gas_groningen_nl_avg_std_hhv__J_m_3 / 1e6 :.2f}e6")


In [None]:
df_nl['actual_gas_std_hhv__J_m_3'].mean()/gas_groningen_nl_avg_std_hhv__J_m_3

In [None]:
print(f"{temp_gas_ref__K / temp_gas_std__K : .2f}")

In [None]:
print(f"{gas_g25_3_ref_hhv__J_m_3 * temp_gas_ref__K / temp_gas_std__K / 1e6 :.2f}e6")

In [None]:
print(f"{gas_g25_3_ref_hhv__J_m_3 * temp_gas_ref__K / temp_gas_std__K / 1e6 :.2f}e6")

In [None]:
print(f"{(df_nl['actual_gas_std_hhv__J_m_3'].mean() / temp_gas_std__K * temp_gas_ref__K) / 1e6:.2f}e6")


In [None]:
temp_gas_std__K / temp_gas_ref__K


In [None]:
print(f"{(df_nl['actual_gas_std_hhv__J_m_3'].mean()  / temp_gas_ref__K * temp_gas_std__K / gas_g25_3_ref_hhv__J_m_3) :.4f}")
