### API Analysis

![alt text](../images/image.png "Title")
This configuration shows the hourly day-ahead (price of energy until the same time tomorrow) for the last two weeks.
When checking the network traffic for the above dates and for the hourly resolution, you will find three .json files being fetched from the API.

A request to the api has the following structure:
https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_[timestamp_in_milliseconds].json

The following request fetch data for the corresponding time frames.

https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_1729461600000.json:
Sunday, 6 October 2024 22:00:00 -> Sunday, 13 October 2024 21:00:00

https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_1728856800000.json:
Sunday, 13 October 2024 22:00:00 -> Sunday, 20 October 2024 21:00:00

https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_1729461600000.json
Sunday, 20 October 2024 22:00:00 -> Sunday, 27 October 2024 22:00:00


You will find that for example the timestamp 1729461600000 maps to the initial date Sunday, 6 October 2024 22:00:00 and every file contains the date for one week. Interestingly enough the site only shows the data for two weeks even though it had to fetch the data for three entire weeks. If the above links are broken, it may be due to a shift in daylight savings time (DST) which we will have to take into account.

Additionally you will see that each .json file contains around 172 (more or less) time series entries for an entire week.



### Implementing the scraper
We now want to implement a scraper that fetches the hourly energy prices for n amount of days. With the above information we now know that we'll have to find the corresponding timestamps for each week and to fetch the data.

In [None]:
import requests
import numpy as np
import logging
from datetime import datetime, timedelta, timezone
import pytz
import time
from pprint import pprint

In [None]:
logging.basicConfig(level=logging.INFO) 

logger = logging.getLogger("scraper_logger")

# console_handler = logging.StreamHandler()
file_handler = logging.FileHandler("app.log")

# console_handler.setLevel(logging.WARNING)
file_handler.setLevel(logging.WARNING) 

# logger.addHandler(console_handler)
logger.addHandler(file_handler)

In [None]:
def scrape(url, delay):
    response =  requests.get(url)
    response.raise_for_status()

    time.sleep(delay)
    return response

In [None]:
from datetime import datetime, timedelta
import pytz

# Define Berlin timezone
tz_berlin = pytz.timezone("Europe/Berlin")

# Calculate last Monday in Berlin time, taking into account local DST
now = datetime.now(tz_berlin)
days_since_monday = now.weekday()
last_monday_berlin = now - timedelta(days=days_since_monday)
last_monday_berlin = last_monday_berlin.replace(hour=0, minute=0, second=0, microsecond=0)

# Convert Berlin time to UTC and get the timestamp in milliseconds
last_monday_utc = last_monday_berlin.astimezone(pytz.UTC)
last_monday_utc_ms = int(last_monday_utc.timestamp() * 1000)

print("Berlin time (local):", last_monday_berlin)
print("UTC time:", last_monday_utc)
print("UTC timestamp (ms):", last_monday_utc_ms)


In [None]:
import requests
import logging
from datetime import datetime, timedelta
import numpy as np
import pytz

# Define Berlin timezone
tz_berlin = pytz.timezone("Europe/Berlin")

# Calculate last Monday in Berlin time, taking into account local DST
now = datetime.now(tz_berlin)
days_since_monday = now.weekday()
last_monday_berlin = now - timedelta(days=days_since_monday)
last_monday_berlin = last_monday_berlin.replace(hour=0, minute=0, second=0, microsecond=0)

# Convert Berlin time to UTC and get the timestamp in milliseconds
last_monday_utc = last_monday_berlin.astimezone(pytz.UTC)
last_monday_utc_ms = int(last_monday_utc.timestamp() * 1000)

print("Berlin time (local):", last_monday_berlin)
print("UTC time:", last_monday_utc)
print("UTC timestamp (ms):", last_monday_utc_ms)

# Define constants
week_in_ms = 24 * 60 * 60 * 1000 * 7
delay = 0.5  # seconds
n = 500  # number of weeks
base_url = "https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_{}.json"

# Use a dictionary to store unique timestamps and prices
energy_ts_data = {}

for k in range(n):
    last_monday_berlin = last_monday_utc.astimezone(tz_berlin)
    last_monday_utc = last_monday_berlin.astimezone(pytz.UTC)
    last_monday_utc_ms = int(last_monday_utc.timestamp() * 1000)

    # Adjust timestamp for daylight savings time (berlin tz) if necessary
    if last_monday_berlin.dst() != timedelta(0):  # DST is in effect
        last_monday_utc_ms -= 60 * 60 * 1000

    try:
        response = requests.get(base_url.format(last_monday_utc_ms))
        response.raise_for_status()
        logging.info(f"Successfully scraped data for ts: {last_monday_berlin} (Europe/Berlin)")
        json_data = response.json()
    except requests.exceptions.HTTPError as http_err:
        logging.warning(f"Failed to scrape data for timestamp: {last_monday_utc} (UTC)\n\tError: {http_err}")
        continue
    except requests.exceptions.JSONDecodeError as decoder_error:
        logging.warning(f"Failed to deserialize JSON: \n\tError: {decoder_error}")
        continue

    # Parse the JSON response
    parsed_json = dict(json_data)

    for ts, price in parsed_json.get("series", []):
        try:
            price_float = float(price)
            # Convert to naive timestamp
            ts_datetime = datetime.fromtimestamp(ts / 1000).replace(tzinfo=None).isoformat()
            print(ts_datetime)
            # Add to the dictionary, overwriting any duplicates
            energy_ts_data[ts_datetime] = price_float
        except TypeError as e:
            logging.warning(f"Failed to parse non-float value for timestamp {ts}\n\tError: {e}")
            continue

    # Move to the previous week
    last_monday_utc = last_monday_utc - timedelta(weeks=1)

# Convert the dictionary to a sorted list of tuples
energy_ts_data_sorted = sorted(energy_ts_data.items())

# Convert to a NumPy array
data = np.array(energy_ts_data_sorted)

print("Final dataset shape:", data.shape)

# Save the data as a CSV file (naive timestamps only)
np.savetxt("../data/day_ahead_energy_prices.csv", data, delimiter=",", fmt="%s")


- Weather:
-- wind
-- sun 
-- temp

- per day energy mix
- gas price per day
- 

In [None]:
start_date = datetime.now()
end_date = datetime(2018, 9, 30)
delta = timedelta(days=1)
delay = 0.2

# end_date = start_date - (10 * delta)

base_url = "https://www.energy-charts.info/charts/energy_pie/data/de/day_pie_{}.json"

current_date = start_date
res = []
while current_date >= end_date:
    try:
        cd_format = current_date.strftime("%Y_%m_%d")
        response = scrape(base_url.format(cd_format), delay)

        logging.info(f"Successfully scraped data for date: {cd_format}")
        res.append((cd_format, response.json()))
    except requests.exceptions.HTTPError as http_err:
        logging.warning(f"Failed to scrape data for date: {cd_format} (UTC)\n\tError: {http_err}")
    except requests.exceptions.JSONDecodeError as decoder_error:
        logging.warning(f"Failed to deserialize JSON: \n\tError: {decoder_error}")
    current_date -= delta


print(len(res))


### Energy Mix Scraper

In [None]:
exclude_cross_boarder_e_trading = True
cbet = "Cross border electricity trading"

dtype = [('date', 'U50'), ('e_component', 'U50'), ('value', 'float32')]

# Initialize an empty structured array
array = np.empty(0, dtype=dtype)

for date, data in res:
    sources = []
    for e_source in data:
        name = str(e_source["name"]["en"])

        if exclude_cross_boarder_e_trading and name == cbet:
            continue

        # Ensure numeric conversion or default to 0
        try:
            y_value = float(e_source["y"])
        except (ValueError, TypeError):
            continue
        
        sources.append((date, name, y_value))
    
    # Convert to a structured array with the correct dtype
    arr = np.array(sources, dtype=dtype)
    
    # Normalize the 'value' column
    arr['value'] /= np.sum(arr['value'], axis=0)

    # Append to the main array
    array = np.append(array, arr)

np.savetxt("../data/daily_market_mix.csv", array, delimiter=",", fmt="%s")
array

In [None]:
POST /api/raw-data HTTP/1.1
Content-Type: application/json
Accept: */*
Sec-Fetch-Site: cross-site
Accept-Language: en-GB,en;q=0.9
Accept-Encoding: gzip, deflate, br
Sec-Fetch-Mode: cors
Origin: https://www.agora-energiewende.de
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15
Content-Length: 538
Referer: https://www.agora-energiewende.de/
Connection: keep-alive
Sec-Fetch-Dest: empty
X-Requested-With: XMLHttpRequest
Api-key: agora_live_62ce76dd202927.67115829
Priority: u=3, i


{"filters":{"from":"2023-11-01","to":"2024-10-01","generation":["Total electricity demand","Biomass","Hydro","Wind offshore","Wind onshore","Solar","Total conventional power plant","Nuclear","Lignite","Hard Coal","Natural Gas","Pumped storage generation","Other","Grid emission factor","Total grid emissions","Total Renewables","Total Conventional","Renewable share","Conventional share"]},"x_coordinate":"date_id","y_coordinate":"value","view_name":"live_gen_plus_emi_de_hourly","kpi_name":"power_generation","z_coordinate":"generation"}

In [None]:
import requests

# Define the API endpoint and headers
url = "https://api.agora-energy.org/api/raw-data"
headers = {
    "Content-Type": "application/json",
    "Accept": "*/*",
    "Sec-Fetch-Site": "cross-site",
    "Accept-Language": "en-GB,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Sec-Fetch-Mode": "cors",
    "Origin": "https://www.agora-energiewende.de",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15",
    "Referer": "https://www.agora-energiewende.de/",
    "Connection": "keep-alive",
    "Sec-Fetch-Dest": "empty",
    "X-Requested-With": "XMLHttpRequest",
    "Api-key": "agora_live_62ce76dd202927.67115829",
}

out = []

# Define the payload
for year in [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]:
    payload = {
        "filters": {
            "from": f"{year}-10-01",
            "to": f"{year + 1}-09-30",
            "generation": [
                "Total electricity demand", "Biomass", "Hydro", "Wind offshore",
                "Wind onshore", "Solar", "Total conventional power plant", "Nuclear",
                "Lignite", "Hard Coal", "Natural Gas", "Pumped storage generation",
                "Other", "Grid emission factor", "Total grid emissions", "Total Renewables",
                "Total Conventional", "Renewable share", "Conventional share"
            ]
        },
        "x_coordinate": "date_id",
        "y_coordinate": "value",
        "view_name": "live_gen_plus_emi_de_hourly",
        "kpi_name": "power_generation",
        "z_coordinate": "generation"
    }

    # Make the POST request
    response = requests.post(url, headers=headers, json=payload)

    # Check the response
    if response.status_code == 200:
        print("Request was successful!", year, year+1)
        data = {}
        data = response.json()
        out.extend(data["data"]["data"])
    else:
        print(f"Request failed with status code {response.status_code}", year, year+1)
    time.sleep(0.3)


In [None]:
np.savetxt("../data/hourly_market_mix.csv", np.array(out), delimiter=",", fmt="%s")

In [None]:
data = np.array(out)

mix_categories = [
    "Biomass",
    "Hard Coal",
    "Hydro",
    "Lignite",
    "Natural Gas",
    "Nuclear",
    "Other",
    "Pumped storage generation",
    "Solar",
    "Wind offshore",
    "Wind onshore",
]

other_metrics = [
    "Grid emission factor",
    "Total conventional power plant",
    "Total electricity demand",
    "Total grid emissions",
]

# Define start and end dates as naive datetime objects
start_date = datetime.fromisoformat("2018-10-01T00:00:00")
end_date = datetime.fromisoformat("2024-10-30T00:00:00")

# Generate hourly timestamps
timestamps = [
    start_date + timedelta(hours=i)
    for i in range(int((end_date - start_date).total_seconds() // 3600) + 1)
]
timestamp_strings = [ts.isoformat() for ts in timestamps]
mix_rows = []
other_metrics_rows = []

data_dict = {ts: [] for ts in timestamp_strings}
for d in data:
    d_timestamp = datetime.fromisoformat(d[0]).isoformat()  # Naive datetime conversion
    if d_timestamp in data_dict:
        data_dict[d_timestamp].append(d)
for ts in timestamp_strings:
    hour_data = np.array(data_dict.get(ts, []))  # Fetch data for this timestamp
    if hour_data.size == 0:
        continue

    mix_per_hour = hour_data[np.isin(hour_data[:, 2], mix_categories)]
    
    if mix_per_hour.size == 0:
        continue
    
    mix_per_hour = np.where(mix_per_hour == None, 0.0, mix_per_hour)


    mix_per_hour[:, 1] = (
        mix_per_hour[:, 1].astype(float) / mix_per_hour[:, 1].astype(float).sum()
    )

    row = np.concatenate(([ts], mix_per_hour[:, 1]))
    if row.shape[0] == 11:
        row = np.insert(row, 6, 0.0)
    mix_rows.append(row)

    other_metrics_per_hour = hour_data[np.isin(hour_data[:, 2], other_metrics)]
    if other_metrics_per_hour.size == 0:
        continue
    row = np.concatenate(([ts], other_metrics_per_hour[:, 1]))
    
    other_metrics_rows.append(row)
    
percentage_mix = np.vstack([["Timestamp"] + mix_categories] + mix_rows)
percentage_sources = np.vstack([["Timestamp"] + other_metrics] + other_metrics_rows)

np.savetxt("../data/hourly_market_mix_cleaned.csv", percentage_mix, delimiter=",", fmt="%s")
np.savetxt("../data/hourly_market_metrics_cleaned.csv", percentage_sources, delimiter=",", fmt="%s")

### Weather Data

In [None]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import importlib.util

# Function to check if required packages are installed
def check_package_installed(package_name):
    package_spec = importlib.util.find_spec(package_name)
    if package_spec is None:
        print(f"{package_name} is not installed!")
    else:
        print(f"{package_name} is installed!")

# Mapping of module names to package names
packages = {
    "openmeteo_requests": "openmeteo-requests",
    "requests_cache": "requests-cache",
    "retry_requests": "retry-requests",
}

for module_name, package_name in packages.items():
    check_package_installed(module_name)

# Setup the Open-Meteo API client with caching and retries
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Define the base URL for the weather API
url = "https://archive-api.open-meteo.com/v1/archive"

# Define the list of representative coordinates for Germany
coordinates = [
    {"latitude": 52.52, "longitude": 13.405},  # Berlin
    {"latitude": 53.5511, "longitude": 9.9937},  # Hamburg
    {"latitude": 48.1351, "longitude": 11.5820},  # Munich
    {"latitude": 50.9375, "longitude": 6.9603},  # Cologne
    {"latitude": 50.1109, "longitude": 8.6821},  # Frankfurt
    {"latitude": 51.0504, "longitude": 13.7373},  # Dresden
    {"latitude": 48.7758, "longitude": 9.1829},  # Stuttgart
]

# Define the weather variables and date range
params_template = {
    "start_date": "2018-01-01",
    "end_date": "2024-11-21",
    "hourly": [
        "temperature_2m", "relative_humidity_2m", "precipitation", 
        "surface_pressure", "cloud_cover", "wind_speed_100m", 
        "sunshine_duration", "shortwave_radiation", "direct_radiation"
    ]
}

# Store data for all locations
all_data = []

for coord in coordinates:
    params = params_template.copy()
    params.update({
        "latitude": coord["latitude"],
        "longitude": coord["longitude"],
    })

    try:
        # Fetch weather data for the current location
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]

        # Extract hourly data for this location
        hourly = response.Hourly()
        hourly_data = {
            "date": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            )
        }
        hourly_data["temperature_2m"] = hourly.Variables(0).ValuesAsNumpy()
        hourly_data["relative_humidity_2m"] = hourly.Variables(1).ValuesAsNumpy()
        hourly_data["precipitation"] = hourly.Variables(2).ValuesAsNumpy()
        hourly_data["surface_pressure"] = hourly.Variables(3).ValuesAsNumpy()
        hourly_data["cloud_cover"] = hourly.Variables(4).ValuesAsNumpy()
        hourly_data["wind_speed_100m"] = hourly.Variables(5).ValuesAsNumpy()
        hourly_data["sunshine_duration"] = hourly.Variables(6).ValuesAsNumpy()
        hourly_data["shortwave_radiation"] = hourly.Variables(7).ValuesAsNumpy()
        hourly_data["direct_radiation"] = hourly.Variables(8).ValuesAsNumpy()

        # Convert to DataFrame and append to the list
        hourly_dataframe = pd.DataFrame(data=hourly_data)
        all_data.append(hourly_dataframe)

    except Exception as e:
        print(f"Error fetching data for coordinates {coord}: {e}")

# Combine all data into one DataFrame
combined_df = pd.concat(all_data)

# Group by date and calculate the mean for all variables
averaged_data = combined_df.groupby("date").mean()

# Rename columns for better understanding
averaged_data = averaged_data.rename(columns={
    "shortwave_radiation": "Global Horizontal Irradiance",
    "precipitation": "Precipitation (rain/snow)"
})

# Save the averaged data to a CSV file
csv_file = "../data/germany_weather_average.csv"
averaged_data.to_csv(csv_file, index=True)

print(f"Averaged weather data for Germany saved to {csv_file}.")


In [None]:
import pandas as pd

#Read CSV File
csv_file = "../data/germany_weather_average.csv"
data = pd.read_csv(csv_file)

# filter all columns including numbers
numeric_columns = data.select_dtypes(include=["number"]).columns

# Find all columns with negativ values
negative_columns = [col for col in numeric_columns if (data[col] < 0).any()]

# Print all Headers with negativ values
print("Columns with negative values:", negative_columns)


In [None]:
data = pd.read_csv("../data/germany_weather_average.csv")

non_numeric_columns = data.select_dtypes(exclude=["number"]).columns

print("Headers with non-numeric values:", non_numeric_columns)

In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import importlib.util



# Setup the Open-Meteo API client with caching and retries
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Define the base URL for the weather API
url = "https://archive-api.open-meteo.com/v1/archive"

# Offshore wind parks (coordinates and weights)
wind_parks = [
    {"latitude": 54.008333, "longitude": 6.598333, "weight": 60},      # Alpha Ventus
    {"latitude": 54.358333, "longitude": 5.975, "weight": 400},        # BARD Offshore I
    {"latitude": 53.690, "longitude": 6.480, "weight": 113.4},         # Riffgat
    {"latitude": 54.15, "longitude": 7.25, "weight": 295},             # Amrumbank West
    {"latitude": 54.53, "longitude": 6.25, "weight": 200},             # Butendiek
    {"latitude": 54.367, "longitude": 6.467, "weight": 295},           # DanTysk
    {"latitude": 54.480, "longitude": 7.370, "weight": 288},           # Meerwind Süd|Ost
    {"latitude": 54.4, "longitude": 6.6, "weight": 576},               # Gode Wind 1 & 2
    {"latitude": 54.30, "longitude": 6.65, "weight": 400},             # Global Tech I
    {"latitude": 53.88, "longitude": 6.59, "weight": 450},             # Borkum Riffgrund 1
    {"latitude": 53.88, "longitude": 6.59, "weight": 215},             # Borkum Riffgrund 2
    {"latitude": 54.00, "longitude": 6.58, "weight": 342},             # Trianel Windpark Borkum
    {"latitude": 54.22, "longitude": 6.63, "weight": 332},             # Nordsee Ost
    {"latitude": 54.25, "longitude": 7.25, "weight": 385},             # Hohe See
    {"latitude": 54.28, "longitude": 7.30, "weight": 252},             # Albatros
    {"latitude": 54.48, "longitude": 6.78, "weight": 350},             # Wikinger
    {"latitude": 54.55, "longitude": 6.37, "weight": 402},             # Arkona
    {"latitude": 54.45, "longitude": 6.58, "weight": 600},             # Veja Mate
    {"latitude": 54.33, "longitude": 7.18, "weight": 300},             # Deutsche Bucht
    {"latitude": 54.25, "longitude": 7.18, "weight": 402},             # Kaskasi
]


# Define the weather variable and date range
params_template = {
    "start_date": "2018-01-01",
    "end_date": "2024-11-21",
    "hourly": ["wind_speed_100m"]
}

# Store data for all locations
weighted_wind_speed = []

for park in wind_parks:
    params = params_template.copy()
    params.update({
        "latitude": park["latitude"],
        "longitude": park["longitude"],
    })

    try:
        # Fetch wind speed data for the current location
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]

        # Extract hourly wind speed data for this location
        hourly = response.Hourly()
        hourly_data = {
            "date": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            ),
            "wind_speed_100m": hourly.Variables(0).ValuesAsNumpy()
        }

        # Convert to DataFrame
        hourly_dataframe = pd.DataFrame(data=hourly_data)

        # Group by date and calculate the mean wind speed
        daily_avg = hourly_dataframe.groupby("date")["wind_speed_100m"].mean()

        # Weight the daily averages and append to the list
        weighted_wind_speed.append(daily_avg * park["weight"])

    except Exception as e:
        print(f"Error fetching data for wind park {park}: {e}")

# Combine weighted wind speeds across all parks
total_weight = sum(park["weight"] for park in wind_parks)
combined_wind_speed = sum(weighted_wind_speed) / total_weight

# Save the weighted average wind speed to a CSV file
csv_file = "../data/weighted_windspeed.csv"
combined_wind_speed.to_csv(csv_file, index=True, header=["windspeed 100m"])

print(f"Weighted average wind speed saved to {csv_file}.")


Weighted average wind speed saved to ../data/weighted_windspeed.csv.


In [6]:
df_orig = pd.read_csv('../data/germany_weather_average.csv')
df_replacemet = pd.read_csv('../data/weighted_windspeed.csv')

#replace column 1 from df_orig with column2 from df_replacement
df_orig['wind_speed_100m'] = df_replacemet['windspeed 100m']

#save new formatted csv
df_orig.to_csv('../data/germany_weather_average.csv', index=False)

#print(df_orig)

0        38.490368
1        39.208380
2        39.968450
3        41.720287
4        43.592230
           ...    
60403    16.334919
60404    13.902251
60405    14.273662
60406    14.276556
60407    16.874899
Name: wind_speed_100m, Length: 60408, dtype: float64
0        52.056625
1        54.795845
2        61.359924
3        65.063110
4        58.501553
           ...    
60403    19.196121
60404    14.000850
60405    10.822694
60406     6.807420
60407     8.352756
Name: windspeed 100m, Length: 60408, dtype: float64


In [None]:
# Open-Meteo API Client und andere notwendige Bibliotheken importieren
import openmeteo_requests
import requests_cache
from retry_requests import retry
import pandas as pd

# Open-Meteo Setup
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Base URL für API
url = "https://archive-api.open-meteo.com/v1/archive"

# Parameter für die API-Abfrage
params_template = {
    "start_date": "2018-01-01",
    "end_date": "2024-11-21",
    "hourly": ["sunshine_duration", "direct_radiation", "shortwave_radiation"]
}

# Ergebnisse speichern
all_data = []

for park in solar_parks:
    params = params_template.copy()
    params.update({
        "latitude": park["latitude"],
        "longitude": park["longitude"]
    })

    try:
        # Daten abrufen
        responses = openmeteo.weather_api(url, params=params)
        response = responses[0]

        # Stündliche Daten
        hourly = response.Hourly()
        hourly_data = {
            "date": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            )
        }
        hourly_data["sunshine_duration"] = hourly.Variables(0).ValuesAsNumpy()
        hourly_data["direct_radiation"] = hourly.Variables(1).ValuesAsNumpy()
        hourly_data["shortwave_radiation"] = hourly.Variables(2).ValuesAsNumpy()

        # In DataFrame konvertieren
        hourly_dataframe = pd.DataFrame(data=hourly_data)
        hourly_dataframe["weight"] = park["weight"]
        all_data.append(hourly_dataframe)

    except Exception as e:
        print(f"Fehler bei {park}: {e}")

# Kombiniere alle Daten
combined_df = pd.concat(all_data)

# Gruppiere nach Datum, berechne gewichteten Durchschnitt
weighted_avg = (
    combined_df.groupby("date")
    .apply(lambda x: pd.Series({
        "sunshine_duration": (x["sunshine_duration"] * x["weight"]).sum() / x["weight"].sum(),
        "direct_radiation": (x["direct_radiation"] * x["weight"]).sum() / x["weight"].sum(),
        "shortwave_radiation": (x["shortwave_radiation"] * x["weight"]).sum() / x["weight"].sum(),
    }))
)

# Speichere Daten in CSV
output_file = "../data/solar_park_weather_average.csv"
weighted_avg.to_csv(output_file, index=True)

print(f"Gewichtete Wetterdaten gespeichert unter {output_file}.")
