In [183]:
import polars as pl 
import pandas as pd 
import datetime as dt
from entsoe import EntsoePandasClient
import requests

In [197]:
df = pd.read_parquet("data/sym_data/sym_df_1m_res_with_inflow.parquet")

In [198]:
df = df.set_index("time").resample("5min").mean()
df.reset_index(inplace=True)

In [171]:
start_date = df[0,0]
end_date = df[-1,0]

In [173]:
def get_co2_emission_data(start_date: str, end_date: str, price_area: str = "DK2", offset: int = 0):
    

    start_date = start_date.strftime("%Y-%m-%dT%H:%M")
    end_date = end_date.strftime("%Y-%m-%dT%H:%M")

    
    url = "https://api.energidataservice.dk/dataset/CO2EmisProg"
    params = {
        "offset": offset,
        "start": start_date,
        "end": end_date,
        "filter": f'{{"PriceArea":["{price_area}"]}}',
        "sort": "PriceArea DESC"
    }
    
    response = requests.get(url, params=params)
    
    if response.status_code == 200:
        return response.json()
    else:
        response.raise_for_status()

data = get_co2_emission_data(start_date, end_date)


In [178]:
emission_df = pl.DataFrame(data["records"])

In [179]:
emission_df = emission_df.with_columns(
    pl.col("Minutes5UTC").str.strptime(pl.Datetime, format="%Y-%m-%dT%H:%M:%S")
    .dt.cast_time_unit("ns")
    .dt.replace_time_zone("UTC")
    .alias("time")
    ).select(["time", "PriceArea", "CO2Emission"]).sort("time")

In [205]:
sym_df = pl.DataFrame(df).join(emission_df, on="time", how="inner")

In [217]:
sym_df.write_csv("data/sym_data/sym_df_5m_res_with_co2.csv")