In [25]:
import asyncio
import re  # regular-expression
import json
import datetime

from motor.motor_asyncio import AsyncIOMotorClient
from pymongo.server_api import ServerApi
import httpx
import bs4  # beautifulsoup
import pandas as pd
import tqdm


In [26]:
country = "10YCH-SWISSGRIDZ"
end_date = datetime.datetime.now().astimezone()
start_date = end_date - datetime.timedelta(days=3)

In [27]:
productiontypes = [
    ("productionType.values", f"B{k:02}")
    for k in range(1, 21)
]

In [28]:
productiontypes

[('productionType.values', 'B01'),
 ('productionType.values', 'B02'),
 ('productionType.values', 'B03'),
 ('productionType.values', 'B04'),
 ('productionType.values', 'B05'),
 ('productionType.values', 'B06'),
 ('productionType.values', 'B07'),
 ('productionType.values', 'B08'),
 ('productionType.values', 'B09'),
 ('productionType.values', 'B10'),
 ('productionType.values', 'B11'),
 ('productionType.values', 'B12'),
 ('productionType.values', 'B13'),
 ('productionType.values', 'B14'),
 ('productionType.values', 'B15'),
 ('productionType.values', 'B16'),
 ('productionType.values', 'B17'),
 ('productionType.values', 'B18'),
 ('productionType.values', 'B19'),
 ('productionType.values', 'B20')]

In [29]:
"""Access the website with the needed parameters; 
select the interesting data from the json-document and create a pandas-dataFrame;
return a pandas-dataFrame with the location-, time- and weather-data"""

# There are 20 different productiontypes
productiontypes = [
    ("productionType.values", f"B{k:02}")
    for k in range(1, 20)
]

async with httpx.AsyncClient(
    base_url="https://transparency.entsoe.eu",
) as client:
    res = await client.get(
        url="/generation/r2/actualGenerationPerProductionType/show",
        params=list({
            "areaType": "CTY",
            "viewType": "GRAPH",
            "dateTime.dateTime": f"{date:%d.%m.%Y} 00:00|UTC|DAYTIMERANGE",
            "dateTime.endDateTime": f"{date:%d.%m.%Y} 00:00|UTC|DAYTIMERANGE",
            "dateTime.timezone": "UTC",
            "area.values": f"CTY|{country}!CTY|{country}",
        }.items()) + productiontypes,
        headers={"X-Requested-With": "XMLHttpRequest"},
    )

# make sure the content is UTF-8 and parse the content with bs4
assert res.headers["content-type"] == "text/html;charset=UTF-8", res.headers["content-type"]
soup = bs4.BeautifulSoup(res.content.decode("utf-8"))

# select only the part 'script' and the chart-list of the http-file
javascript_str = soup.find("script").text
match = re.search(r"var\s+chart\s*=\s*({.*})\s*;", javascript_str, re.S)
assert match is not None

In [49]:
javascript_str

'\n\tvar chart = {"chartKeys":["val1","val2","val3","val4","val5","val6"],"graphDesign":{"val6":{"title":"Wind Onshore  Generation","serialChartType":"column","fillColor":"#9aa64c","fillAlpha":1.0,"lineColor":"#9aa64c","lineAlpha":0.0,"lineThickness":2,"bulletType":"none","bulletSize":0,"assignedAxis":"PRIMARY","stackType":"regular","unit":""},"val5":{"title":"Solar  Generation","serialChartType":"column","fillColor":"#32b3a8","fillAlpha":1.0,"lineColor":"#32b3a8","lineAlpha":0.0,"lineThickness":2,"bulletType":"none","bulletSize":0,"assignedAxis":"PRIMARY","stackType":"regular","unit":""},"val4":{"title":"Nuclear  Generation","serialChartType":"column","fillColor":"#c8c8c8","fillAlpha":1.0,"lineColor":"#c8c8c8","lineAlpha":0.0,"lineThickness":2,"bulletType":"none","bulletSize":0,"assignedAxis":"PRIMARY","stackType":"regular","unit":""},"val3":{"title":"Hydro Water Reservoir  Generation","serialChartType":"column","fillColor":"#f7a0cc","fillAlpha":1.0,"lineColor":"#f7a0cc","lineAlpha":0

In [35]:
# returns the first element of the group
data = json.loads(match.group(1))

# defines the columns for the dataframe
columns = {
    k: " ".join(v["title"].split())
    for k, v in
    data["graphDesign"].items()
}

df = pd.DataFrame(
    data["chartData"]
).set_index(data["categoryName"]).astype(float).rename(columns=columns)

# combine time with date to get a real timestamp
df = df.set_index(pd.MultiIndex.from_arrays(
    [
        [country]*df.shape[0],
        df.index.to_series().apply(
            lambda v: datetime.datetime.combine(date, datetime.time.fromisoformat(v))
        ).dt.tz_localize("UTC"),
    ],
    names=["country", "datetime"],
))

In [59]:
data["graphDesign"]["val6"]["title"]

'Wind Onshore  Generation'