In [4]:
import pandas as pd
import datetime as dt
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

df = pd.read_csv("src/data/analyzed/daily_demand_clean.csv")
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d', errors='coerce')
df['day_of_year'] = df['date'].dt.dayofyear
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month

# filter for after Jan 2019
df = df[df['date'] >= "2019-01-01"]

In [12]:
df

Unnamed: 0,country,type,date,demand,day_of_year,year,month
1,AT,power,2019-01-01,25762200.0,1,2019,1
2,AT,power,2019-01-02,81639200.0,2,2019,1
3,AT,power,2019-01-03,101912200.0,3,2019,1
4,AT,power,2019-01-04,106700200.0,4,2019,1
5,AT,power,2019-01-05,52076600.0,5,2019,1
...,...,...,...,...,...,...,...
144213,SK,power,2025-01-11,9749200.0,11,2025,1
144214,SK,power,2025-01-12,9858400.0,12,2025,1
144215,SK,power,2025-01-13,23487800.0,13,2025,1
144216,SK,power,2025-01-14,24167400.0,14,2025,1


In [17]:
df_group = df.groupby(['country', 'type', 'month', 'year'])['demand'].sum().reset_index()
df_group[(df_group['country'] == 'DE') & (df_group['type'] == 'total')].pivot_table(index='month', columns='year', values='demand')/1000000000

year,2019,2020,2021,2022,2023,2024,2025
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,135.069486,121.332724,139.73043,123.711547,103.185158,120.523277,55.155804
2,105.009498,102.657646,119.61523,101.534221,96.105994,86.351836,
3,96.273282,104.197713,114.322345,103.119495,92.018289,82.575591,
4,74.942767,68.520137,96.204969,79.400616,72.975602,67.308024,
5,69.877354,62.310788,71.387299,51.97706,49.7766,47.603891,
6,44.78443,50.372857,47.033393,41.856425,38.493341,42.826836,
7,48.483984,50.823904,46.332838,38.501214,37.38414,39.105937,
8,46.512004,47.588309,45.697197,36.482451,39.197107,37.155095,
9,56.430393,55.407536,50.356282,47.354944,39.640785,44.644423,
10,76.091264,79.862703,75.604026,55.382989,59.226164,63.65285,


year,2019,2020,2021,2022,2023,2024,2025
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1,4357080000.0,3913959000.0,4507433000.0,3990695000.0,3328553000.0,3887848000.0,3447238000.0
2,3750339000.0,3539919000.0,4271973000.0,3626222000.0,3432357000.0,2977650000.0,
3,3105590000.0,3361217000.0,3687818000.0,3326435000.0,2968332000.0,2663729000.0,
4,2498092000.0,2284005000.0,3206832000.0,2646687000.0,2432520000.0,2243601000.0,
5,2254108000.0,2010025000.0,2302816000.0,1676679000.0,1605697000.0,1535609000.0,
6,1492814000.0,1679095000.0,1567780000.0,1395214000.0,1283111000.0,1427561000.0,
7,1563999000.0,1639481000.0,1494608000.0,1241975000.0,1205940000.0,1261482000.0,
8,1500387000.0,1535107000.0,1474103000.0,1176853000.0,1264423000.0,1198551000.0,
9,1881013000.0,1846918000.0,1678543000.0,1578498000.0,1321360000.0,1488147000.0,
10,2454557000.0,2576216000.0,2438840000.0,1786548000.0,1910521000.0,2053318000.0,


In [16]:
# sort by date column
df = df.sort_values(by='date')


In [17]:
df[(df['country'] == 'ES') & (df['type'] == 'total')]

Unnamed: 0,country,type,date,demand,day_of_year,year
6027,ES,total,2019-07-18,1.213220e+09,199,2019
6028,ES,total,2019-07-19,1.178920e+09,200,2019
6029,ES,total,2019-07-20,1.034870e+09,201,2019
6030,ES,total,2019-07-21,9.602700e+08,202,2019
6031,ES,total,2019-07-22,1.185500e+09,203,2019
...,...,...,...,...,...,...
8031,ES,total,2025-01-12,8.804600e+08,12,2025
8032,ES,total,2025-01-13,1.221480e+09,13,2025
8033,ES,total,2025-01-14,1.460250e+09,14,2025
8034,ES,total,2025-01-15,1.379220e+09,15,2025


In [43]:
df_total = df[df['type'] == 'total']

# Create EU18 aggregate by summing all countries for each date
eu18_agg = df_total.groupby(['date'], as_index=False)['demand'].sum()
eu18_agg['country'] = 'EU18'  # Add country column with EU18 value

# Combine original data with EU18 aggregate
df_total = pd.concat([df_total, eu18_agg[['country', 'date', 'demand']]], ignore_index=True)

df_grouped = df_total.groupby(['country', 'date'], as_index=False)['demand'].sum()

#filter for reference period 2019-01-01 to 2021-12-31
df_ref = df_grouped[(df_grouped['date'] >= "2019-01-01") & (df_grouped['date'] <= "2021-12-31")].copy()
df_ref['day_of_year'] = df_ref['date'].dt.dayofyear
df_grouped['day_of_year'] = df_grouped['date'].dt.dayofyear

#compute average demand by country in the reference period
ref_avg_demand = (
    df_ref.groupby(['country', 'day_of_year'])['demand']
          .mean()
          .reset_index(name='ref_demand')
)

#merge back into the main dataframe
df_grouped = pd.merge(
    df_grouped, 
    ref_avg_demand, 
    on=['country', 'day_of_year'], 
    how='left'
)

#calculate indexed demand
df_grouped['demand_indexed'] = df_grouped['demand'] / df_grouped['ref_demand']

# Add 7-day rolling average
df_grouped['demand_indexed_7d'] = df_grouped.groupby('country')['demand_indexed'].transform(lambda x: x.rolling(7, center=True).mean())

#filter for dates after Jan 2021
df_grouped = df_grouped[df_grouped['date'] > "2020-12-31"]

# Filter for AT
df_at = df_grouped[df_grouped['country'] == 'AT']

# Create the chart using Plotly with both raw and smoothed data
fig = go.Figure()

# Add raw data as light line
fig.add_trace(go.Scatter(
    x=df_at['date'],
    y=df_at['demand_indexed'],
    name='Daily',
    line=dict(color='lightgray'),
    showlegend=True
))

# Add smoothed data as bold line
fig.add_trace(go.Scatter(
    x=df_at['date'],
    y=df_at['demand_indexed_7d'],
    name='7-day average',
    line=dict(width=3),
    showlegend=True
))

# Update the layout
fig.update_layout(
    title='Daily Natural Gas Demand',
    yaxis_title='kWh',
    xaxis_title='Date',
    xaxis=dict(type='date')
)

fig.show()

In [31]:
df_grouped.groupby(['date'])[['demand_indexed','demand_indexed_7d']].sum().reset_index()

Unnamed: 0,date,demand_indexed,demand_indexed_7d
0,2021-01-01,19.725916,19.280453
1,2021-01-02,17.899959,19.286890
2,2021-01-03,17.600979,19.156308
3,2021-01-04,19.626205,19.304602
4,2021-01-05,20.282973,19.359902
...,...,...,...
1821,2025-12-27,0.000000,0.000000
1822,2025-12-28,0.000000,0.000000
1823,2025-12-29,0.000000,0.000000
1824,2025-12-30,0.000000,0.000000


In [32]:
df_total = df[df['type'] == 'total']

In [33]:
df_total.groupby(['date'])[['demand_indexed','demand_indexed_7d']].sum().reset_index()

Unnamed: 0,country,type,date,demand,day_of_year,year
2202,AT,total,2019-01-01,3.951140e+08,1,2019
2203,AT,total,2019-01-02,4.543026e+08,2,2019
2204,AT,total,2019-01-03,4.564488e+08,3,2019
2205,AT,total,2019-01-04,3.837560e+08,4,2019
2206,AT,total,2019-01-05,3.747448e+08,5,2019
...,...,...,...,...,...,...
145791,UK,total,2024-05-24,1.358804e+15,145,2024
145792,UK,total,2024-05-25,8.715803e+14,146,2024
145793,UK,total,2024-05-26,8.378770e+14,147,2024
145794,UK,total,2024-05-27,9.228997e+14,148,2024
