### API Analysis

![alt text](../images/image.png "Title")
This configuration shows the hourly day-ahead (price of energy until the same time tomorrow) for the last two weeks.
When checking the network traffic for the above dates and for the hourly resolution, you will find three .json files being fetched from the API.

A request to the api has the following structure:
https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_[timestamp_in_milliseconds].json

The following request fetch data for the corresponding time frames.

https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_1729461600000.json:
Sunday, 6 October 2024 22:00:00 -> Sunday, 13 October 2024 21:00:00

https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_1728856800000.json:
Sunday, 13 October 2024 22:00:00 -> Sunday, 20 October 2024 21:00:00

https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_1729461600000.json
Sunday, 20 October 2024 22:00:00 -> Sunday, 27 October 2024 22:00:00


You will find that for example the timestamp 1729461600000 maps to the initial date Sunday, 6 October 2024 22:00:00 and every file contains the date for one week. Interestingly enough the site only shows the data for two weeks even though it had to fetch the data for three entire weeks. If the above links are broken, it may be due to a shift in daylight savings time (DST) which we will have to take into account.

Additionally you will see that each .json file contains around 172 (more or less) time series entries for an entire week.



### Implementing the scraper
We now want to implement a scraper that fetches the hourly energy prices for n amount of days. With the above information we now know that we'll have to find the corresponding timestamps for each week and to fetch the data.

In [41]:
import requests
import numpy as np
import logging
from datetime import datetime, timedelta, timezone
import pytz
import time
from pprint import pprint

In [37]:
logging.basicConfig(level=logging.INFO) 

logger = logging.getLogger("scraper_logger")

# console_handler = logging.StreamHandler()
file_handler = logging.FileHandler("app.log")

# console_handler.setLevel(logging.WARNING)
file_handler.setLevel(logging.WARNING) 

# logger.addHandler(console_handler)
logger.addHandler(file_handler)

In [38]:
def scrape(url, delay):
    response =  requests.get(url)
    response.raise_for_status()

    time.sleep(delay)
    return response

In [39]:
from datetime import datetime, timedelta
import pytz

# Define Berlin timezone
tz_berlin = pytz.timezone("Europe/Berlin")

# Calculate last Monday in Berlin time, taking into account local DST
now = datetime.now(tz_berlin)
days_since_monday = now.weekday()
last_monday_berlin = now - timedelta(days=days_since_monday)
last_monday_berlin = last_monday_berlin.replace(hour=0, minute=0, second=0, microsecond=0)

# Convert Berlin time to UTC and get the timestamp in milliseconds
last_monday_utc = last_monday_berlin.astimezone(pytz.UTC)
last_monday_utc_ms = int(last_monday_utc.timestamp() * 1000)

print("Berlin time (local):", last_monday_berlin)
print("UTC time:", last_monday_utc)
print("UTC timestamp (ms):", last_monday_utc_ms)


Berlin time (local): 2025-01-27 00:00:00+01:00
UTC time: 2025-01-26 23:00:00+00:00
UTC timestamp (ms): 1737932400000


In [40]:
import requests
import logging
from datetime import datetime, timedelta
import numpy as np
import pytz

# Define Berlin timezone
tz_berlin = pytz.timezone("Europe/Berlin")

# Calculate last Monday in Berlin time, taking into account local DST
now = datetime.now(tz_berlin)
days_since_monday = now.weekday()
last_monday_berlin = now - timedelta(days=days_since_monday)
last_monday_berlin = last_monday_berlin.replace(hour=0, minute=0, second=0, microsecond=0)

# Convert Berlin time to UTC and get the timestamp in milliseconds
last_monday_utc = last_monday_berlin.astimezone(pytz.UTC)
last_monday_utc_ms = int(last_monday_utc.timestamp() * 1000)

print("Berlin time (local):", last_monday_berlin)
print("UTC time:", last_monday_utc)
print("UTC timestamp (ms):", last_monday_utc_ms)

# Define constants
week_in_ms = 24 * 60 * 60 * 1000 * 7
delay = 0.5  # seconds
n = 500  # number of weeks
base_url = "https://www.smard.de/app/chart_data/4169/DE/4169_DE_hour_{}.json"

# Use a dictionary to store unique timestamps and prices
energy_ts_data = {}

for k in range(n):
    last_monday_berlin = last_monday_utc.astimezone(tz_berlin)
    last_monday_utc = last_monday_berlin.astimezone(pytz.UTC)
    last_monday_utc_ms = int(last_monday_utc.timestamp() * 1000)

    # Adjust timestamp for daylight savings time (berlin tz) if necessary
    if last_monday_berlin.dst() != timedelta(0):  # DST is in effect
        last_monday_utc_ms -= 60 * 60 * 1000

    try:
        response = requests.get(base_url.format(last_monday_utc_ms))
        response.raise_for_status()
        logging.info(f"Successfully scraped data for ts: {last_monday_berlin} (Europe/Berlin)")
        json_data = response.json()
    except requests.exceptions.HTTPError as http_err:
        logging.warning(f"Failed to scrape data for timestamp: {last_monday_utc} (UTC)\n\tError: {http_err}")
        continue
    except requests.exceptions.JSONDecodeError as decoder_error:
        logging.warning(f"Failed to deserialize JSON: \n\tError: {decoder_error}")
        continue

    # Parse the JSON response
    parsed_json = dict(json_data)

    for ts, price in parsed_json.get("series", []):
        try:
            price_float = float(price)
            # Convert to naive timestamp
            ts_datetime = datetime.fromtimestamp(ts / 1000).replace(tzinfo=None).isoformat()
            print(ts_datetime)
            # Add to the dictionary, overwriting any duplicates
            energy_ts_data[ts_datetime] = price_float
        except TypeError as e:
            logging.warning(f"Failed to parse non-float value for timestamp {ts}\n\tError: {e}")
            continue

    # Move to the previous week
    last_monday_utc = last_monday_utc - timedelta(weeks=1)

# Convert the dictionary to a sorted list of tuples
energy_ts_data_sorted = sorted(energy_ts_data.items())

# Convert to a NumPy array
data = np.array(energy_ts_data_sorted)

print("Final dataset shape:", data.shape)

# Save the data as a CSV file (naive timestamps only)
np.savetxt("../data/day_ahead_energy_prices.csv", data, delimiter=",", fmt="%s")


INFO:root:Successfully scraped data for ts: 2025-01-27 00:00:00+01:00 (Europe/Berlin)
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Error: float() argument must be a string or a number, not 'NoneType'
	Err

Berlin time (local): 2025-01-27 00:00:00+01:00
UTC time: 2025-01-26 23:00:00+00:00
UTC timestamp (ms): 1737932400000
2025-01-27T00:00:00
2025-01-27T01:00:00
2025-01-27T02:00:00
2025-01-27T03:00:00
2025-01-27T04:00:00
2025-01-27T05:00:00
2025-01-27T06:00:00
2025-01-27T07:00:00
2025-01-27T08:00:00
2025-01-27T09:00:00
2025-01-27T10:00:00
2025-01-27T11:00:00
2025-01-27T12:00:00
2025-01-27T13:00:00
2025-01-27T14:00:00
2025-01-27T15:00:00
2025-01-27T16:00:00
2025-01-27T17:00:00
2025-01-27T18:00:00
2025-01-27T19:00:00
2025-01-27T20:00:00
2025-01-27T21:00:00
2025-01-27T22:00:00
2025-01-27T23:00:00
2025-01-28T00:00:00
2025-01-28T01:00:00
2025-01-28T02:00:00
2025-01-28T03:00:00
2025-01-28T04:00:00
2025-01-28T05:00:00
2025-01-28T06:00:00
2025-01-28T07:00:00
2025-01-28T08:00:00
2025-01-28T09:00:00
2025-01-28T10:00:00
2025-01-28T11:00:00
2025-01-28T12:00:00
2025-01-28T13:00:00
2025-01-28T14:00:00
2025-01-28T15:00:00
2025-01-28T16:00:00
2025-01-28T17:00:00
2025-01-28T18:00:00
2025-01-28T19:00:00
202

INFO:root:Successfully scraped data for ts: 2025-01-20 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2025-01-13 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2025-01-06 00:00:00+01:00 (Europe/Berlin)


2025-01-20T00:00:00
2025-01-20T01:00:00
2025-01-20T02:00:00
2025-01-20T03:00:00
2025-01-20T04:00:00
2025-01-20T05:00:00
2025-01-20T06:00:00
2025-01-20T07:00:00
2025-01-20T08:00:00
2025-01-20T09:00:00
2025-01-20T10:00:00
2025-01-20T11:00:00
2025-01-20T12:00:00
2025-01-20T13:00:00
2025-01-20T14:00:00
2025-01-20T15:00:00
2025-01-20T16:00:00
2025-01-20T17:00:00
2025-01-20T18:00:00
2025-01-20T19:00:00
2025-01-20T20:00:00
2025-01-20T21:00:00
2025-01-20T22:00:00
2025-01-20T23:00:00
2025-01-21T00:00:00
2025-01-21T01:00:00
2025-01-21T02:00:00
2025-01-21T03:00:00
2025-01-21T04:00:00
2025-01-21T05:00:00
2025-01-21T06:00:00
2025-01-21T07:00:00
2025-01-21T08:00:00
2025-01-21T09:00:00
2025-01-21T10:00:00
2025-01-21T11:00:00
2025-01-21T12:00:00
2025-01-21T13:00:00
2025-01-21T14:00:00
2025-01-21T15:00:00
2025-01-21T16:00:00
2025-01-21T17:00:00
2025-01-21T18:00:00
2025-01-21T19:00:00
2025-01-21T20:00:00
2025-01-21T21:00:00
2025-01-21T22:00:00
2025-01-21T23:00:00
2025-01-22T00:00:00
2025-01-22T01:00:00


INFO:root:Successfully scraped data for ts: 2024-12-30 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-12-23 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-12-16 00:00:00+01:00 (Europe/Berlin)


2024-12-30T00:00:00
2024-12-30T01:00:00
2024-12-30T02:00:00
2024-12-30T03:00:00
2024-12-30T04:00:00
2024-12-30T05:00:00
2024-12-30T06:00:00
2024-12-30T07:00:00
2024-12-30T08:00:00
2024-12-30T09:00:00
2024-12-30T10:00:00
2024-12-30T11:00:00
2024-12-30T12:00:00
2024-12-30T13:00:00
2024-12-30T14:00:00
2024-12-30T15:00:00
2024-12-30T16:00:00
2024-12-30T17:00:00
2024-12-30T18:00:00
2024-12-30T19:00:00
2024-12-30T20:00:00
2024-12-30T21:00:00
2024-12-30T22:00:00
2024-12-30T23:00:00
2024-12-31T00:00:00
2024-12-31T01:00:00
2024-12-31T02:00:00
2024-12-31T03:00:00
2024-12-31T04:00:00
2024-12-31T05:00:00
2024-12-31T06:00:00
2024-12-31T07:00:00
2024-12-31T08:00:00
2024-12-31T09:00:00
2024-12-31T10:00:00
2024-12-31T11:00:00
2024-12-31T12:00:00
2024-12-31T13:00:00
2024-12-31T14:00:00
2024-12-31T15:00:00
2024-12-31T16:00:00
2024-12-31T17:00:00
2024-12-31T18:00:00
2024-12-31T19:00:00
2024-12-31T20:00:00
2024-12-31T21:00:00
2024-12-31T22:00:00
2024-12-31T23:00:00
2025-01-01T00:00:00
2025-01-01T01:00:00


INFO:root:Successfully scraped data for ts: 2024-12-09 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-12-02 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-11-25 00:00:00+01:00 (Europe/Berlin)


2024-12-09T00:00:00
2024-12-09T01:00:00
2024-12-09T02:00:00
2024-12-09T03:00:00
2024-12-09T04:00:00
2024-12-09T05:00:00
2024-12-09T06:00:00
2024-12-09T07:00:00
2024-12-09T08:00:00
2024-12-09T09:00:00
2024-12-09T10:00:00
2024-12-09T11:00:00
2024-12-09T12:00:00
2024-12-09T13:00:00
2024-12-09T14:00:00
2024-12-09T15:00:00
2024-12-09T16:00:00
2024-12-09T17:00:00
2024-12-09T18:00:00
2024-12-09T19:00:00
2024-12-09T20:00:00
2024-12-09T21:00:00
2024-12-09T22:00:00
2024-12-09T23:00:00
2024-12-10T00:00:00
2024-12-10T01:00:00
2024-12-10T02:00:00
2024-12-10T03:00:00
2024-12-10T04:00:00
2024-12-10T05:00:00
2024-12-10T06:00:00
2024-12-10T07:00:00
2024-12-10T08:00:00
2024-12-10T09:00:00
2024-12-10T10:00:00
2024-12-10T11:00:00
2024-12-10T12:00:00
2024-12-10T13:00:00
2024-12-10T14:00:00
2024-12-10T15:00:00
2024-12-10T16:00:00
2024-12-10T17:00:00
2024-12-10T18:00:00
2024-12-10T19:00:00
2024-12-10T20:00:00
2024-12-10T21:00:00
2024-12-10T22:00:00
2024-12-10T23:00:00
2024-12-11T00:00:00
2024-12-11T01:00:00


INFO:root:Successfully scraped data for ts: 2024-11-18 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-11-11 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-11-04 00:00:00+01:00 (Europe/Berlin)


2024-11-18T00:00:00
2024-11-18T01:00:00
2024-11-18T02:00:00
2024-11-18T03:00:00
2024-11-18T04:00:00
2024-11-18T05:00:00
2024-11-18T06:00:00
2024-11-18T07:00:00
2024-11-18T08:00:00
2024-11-18T09:00:00
2024-11-18T10:00:00
2024-11-18T11:00:00
2024-11-18T12:00:00
2024-11-18T13:00:00
2024-11-18T14:00:00
2024-11-18T15:00:00
2024-11-18T16:00:00
2024-11-18T17:00:00
2024-11-18T18:00:00
2024-11-18T19:00:00
2024-11-18T20:00:00
2024-11-18T21:00:00
2024-11-18T22:00:00
2024-11-18T23:00:00
2024-11-19T00:00:00
2024-11-19T01:00:00
2024-11-19T02:00:00
2024-11-19T03:00:00
2024-11-19T04:00:00
2024-11-19T05:00:00
2024-11-19T06:00:00
2024-11-19T07:00:00
2024-11-19T08:00:00
2024-11-19T09:00:00
2024-11-19T10:00:00
2024-11-19T11:00:00
2024-11-19T12:00:00
2024-11-19T13:00:00
2024-11-19T14:00:00
2024-11-19T15:00:00
2024-11-19T16:00:00
2024-11-19T17:00:00
2024-11-19T18:00:00
2024-11-19T19:00:00
2024-11-19T20:00:00
2024-11-19T21:00:00
2024-11-19T22:00:00
2024-11-19T23:00:00
2024-11-20T00:00:00
2024-11-20T01:00:00


INFO:root:Successfully scraped data for ts: 2024-10-28 00:00:00+01:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-10-21 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-10-14 01:00:00+02:00 (Europe/Berlin)


2024-10-28T00:00:00
2024-10-28T01:00:00
2024-10-28T02:00:00
2024-10-28T03:00:00
2024-10-28T04:00:00
2024-10-28T05:00:00
2024-10-28T06:00:00
2024-10-28T07:00:00
2024-10-28T08:00:00
2024-10-28T09:00:00
2024-10-28T10:00:00
2024-10-28T11:00:00
2024-10-28T12:00:00
2024-10-28T13:00:00
2024-10-28T14:00:00
2024-10-28T15:00:00
2024-10-28T16:00:00
2024-10-28T17:00:00
2024-10-28T18:00:00
2024-10-28T19:00:00
2024-10-28T20:00:00
2024-10-28T21:00:00
2024-10-28T22:00:00
2024-10-28T23:00:00
2024-10-29T00:00:00
2024-10-29T01:00:00
2024-10-29T02:00:00
2024-10-29T03:00:00
2024-10-29T04:00:00
2024-10-29T05:00:00
2024-10-29T06:00:00
2024-10-29T07:00:00
2024-10-29T08:00:00
2024-10-29T09:00:00
2024-10-29T10:00:00
2024-10-29T11:00:00
2024-10-29T12:00:00
2024-10-29T13:00:00
2024-10-29T14:00:00
2024-10-29T15:00:00
2024-10-29T16:00:00
2024-10-29T17:00:00
2024-10-29T18:00:00
2024-10-29T19:00:00
2024-10-29T20:00:00
2024-10-29T21:00:00
2024-10-29T22:00:00
2024-10-29T23:00:00
2024-10-30T00:00:00
2024-10-30T01:00:00


INFO:root:Successfully scraped data for ts: 2024-10-07 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-09-30 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-09-23 01:00:00+02:00 (Europe/Berlin)


2024-10-07T00:00:00
2024-10-07T01:00:00
2024-10-07T02:00:00
2024-10-07T03:00:00
2024-10-07T04:00:00
2024-10-07T05:00:00
2024-10-07T06:00:00
2024-10-07T07:00:00
2024-10-07T08:00:00
2024-10-07T09:00:00
2024-10-07T10:00:00
2024-10-07T11:00:00
2024-10-07T12:00:00
2024-10-07T13:00:00
2024-10-07T14:00:00
2024-10-07T15:00:00
2024-10-07T16:00:00
2024-10-07T17:00:00
2024-10-07T18:00:00
2024-10-07T19:00:00
2024-10-07T20:00:00
2024-10-07T21:00:00
2024-10-07T22:00:00
2024-10-07T23:00:00
2024-10-08T00:00:00
2024-10-08T01:00:00
2024-10-08T02:00:00
2024-10-08T03:00:00
2024-10-08T04:00:00
2024-10-08T05:00:00
2024-10-08T06:00:00
2024-10-08T07:00:00
2024-10-08T08:00:00
2024-10-08T09:00:00
2024-10-08T10:00:00
2024-10-08T11:00:00
2024-10-08T12:00:00
2024-10-08T13:00:00
2024-10-08T14:00:00
2024-10-08T15:00:00
2024-10-08T16:00:00
2024-10-08T17:00:00
2024-10-08T18:00:00
2024-10-08T19:00:00
2024-10-08T20:00:00
2024-10-08T21:00:00
2024-10-08T22:00:00
2024-10-08T23:00:00
2024-10-09T00:00:00
2024-10-09T01:00:00


INFO:root:Successfully scraped data for ts: 2024-09-16 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-09-09 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-09-02 01:00:00+02:00 (Europe/Berlin)


2024-09-16T00:00:00
2024-09-16T01:00:00
2024-09-16T02:00:00
2024-09-16T03:00:00
2024-09-16T04:00:00
2024-09-16T05:00:00
2024-09-16T06:00:00
2024-09-16T07:00:00
2024-09-16T08:00:00
2024-09-16T09:00:00
2024-09-16T10:00:00
2024-09-16T11:00:00
2024-09-16T12:00:00
2024-09-16T13:00:00
2024-09-16T14:00:00
2024-09-16T15:00:00
2024-09-16T16:00:00
2024-09-16T17:00:00
2024-09-16T18:00:00
2024-09-16T19:00:00
2024-09-16T20:00:00
2024-09-16T21:00:00
2024-09-16T22:00:00
2024-09-16T23:00:00
2024-09-17T00:00:00
2024-09-17T01:00:00
2024-09-17T02:00:00
2024-09-17T03:00:00
2024-09-17T04:00:00
2024-09-17T05:00:00
2024-09-17T06:00:00
2024-09-17T07:00:00
2024-09-17T08:00:00
2024-09-17T09:00:00
2024-09-17T10:00:00
2024-09-17T11:00:00
2024-09-17T12:00:00
2024-09-17T13:00:00
2024-09-17T14:00:00
2024-09-17T15:00:00
2024-09-17T16:00:00
2024-09-17T17:00:00
2024-09-17T18:00:00
2024-09-17T19:00:00
2024-09-17T20:00:00
2024-09-17T21:00:00
2024-09-17T22:00:00
2024-09-17T23:00:00
2024-09-18T00:00:00
2024-09-18T01:00:00


INFO:root:Successfully scraped data for ts: 2024-08-26 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-08-19 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-08-12 01:00:00+02:00 (Europe/Berlin)


2024-08-26T00:00:00
2024-08-26T01:00:00
2024-08-26T02:00:00
2024-08-26T03:00:00
2024-08-26T04:00:00
2024-08-26T05:00:00
2024-08-26T06:00:00
2024-08-26T07:00:00
2024-08-26T08:00:00
2024-08-26T09:00:00
2024-08-26T10:00:00
2024-08-26T11:00:00
2024-08-26T12:00:00
2024-08-26T13:00:00
2024-08-26T14:00:00
2024-08-26T15:00:00
2024-08-26T16:00:00
2024-08-26T17:00:00
2024-08-26T18:00:00
2024-08-26T19:00:00
2024-08-26T20:00:00
2024-08-26T21:00:00
2024-08-26T22:00:00
2024-08-26T23:00:00
2024-08-27T00:00:00
2024-08-27T01:00:00
2024-08-27T02:00:00
2024-08-27T03:00:00
2024-08-27T04:00:00
2024-08-27T05:00:00
2024-08-27T06:00:00
2024-08-27T07:00:00
2024-08-27T08:00:00
2024-08-27T09:00:00
2024-08-27T10:00:00
2024-08-27T11:00:00
2024-08-27T12:00:00
2024-08-27T13:00:00
2024-08-27T14:00:00
2024-08-27T15:00:00
2024-08-27T16:00:00
2024-08-27T17:00:00
2024-08-27T18:00:00
2024-08-27T19:00:00
2024-08-27T20:00:00
2024-08-27T21:00:00
2024-08-27T22:00:00
2024-08-27T23:00:00
2024-08-28T00:00:00
2024-08-28T01:00:00


INFO:root:Successfully scraped data for ts: 2024-08-05 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-07-29 01:00:00+02:00 (Europe/Berlin)
INFO:root:Successfully scraped data for ts: 2024-07-22 01:00:00+02:00 (Europe/Berlin)


2024-08-05T00:00:00
2024-08-05T01:00:00
2024-08-05T02:00:00
2024-08-05T03:00:00
2024-08-05T04:00:00
2024-08-05T05:00:00
2024-08-05T06:00:00
2024-08-05T07:00:00
2024-08-05T08:00:00
2024-08-05T09:00:00
2024-08-05T10:00:00
2024-08-05T11:00:00
2024-08-05T12:00:00
2024-08-05T13:00:00
2024-08-05T14:00:00
2024-08-05T15:00:00
2024-08-05T16:00:00
2024-08-05T17:00:00
2024-08-05T18:00:00
2024-08-05T19:00:00
2024-08-05T20:00:00
2024-08-05T21:00:00
2024-08-05T22:00:00
2024-08-05T23:00:00
2024-08-06T00:00:00
2024-08-06T01:00:00
2024-08-06T02:00:00
2024-08-06T03:00:00
2024-08-06T04:00:00
2024-08-06T05:00:00
2024-08-06T06:00:00
2024-08-06T07:00:00
2024-08-06T08:00:00
2024-08-06T09:00:00
2024-08-06T10:00:00
2024-08-06T11:00:00
2024-08-06T12:00:00
2024-08-06T13:00:00
2024-08-06T14:00:00
2024-08-06T15:00:00
2024-08-06T16:00:00
2024-08-06T17:00:00
2024-08-06T18:00:00
2024-08-06T19:00:00
2024-08-06T20:00:00
2024-08-06T21:00:00
2024-08-06T22:00:00
2024-08-06T23:00:00
2024-08-07T00:00:00
2024-08-07T01:00:00


INFO:root:Successfully scraped data for ts: 2024-07-15 01:00:00+02:00 (Europe/Berlin)


2024-07-15T00:00:00
2024-07-15T01:00:00
2024-07-15T02:00:00
2024-07-15T03:00:00
2024-07-15T04:00:00
2024-07-15T05:00:00
2024-07-15T06:00:00
2024-07-15T07:00:00
2024-07-15T08:00:00
2024-07-15T09:00:00
2024-07-15T10:00:00
2024-07-15T11:00:00
2024-07-15T12:00:00
2024-07-15T13:00:00
2024-07-15T14:00:00
2024-07-15T15:00:00
2024-07-15T16:00:00
2024-07-15T17:00:00
2024-07-15T18:00:00
2024-07-15T19:00:00
2024-07-15T20:00:00
2024-07-15T21:00:00
2024-07-15T22:00:00
2024-07-15T23:00:00
2024-07-16T00:00:00
2024-07-16T01:00:00
2024-07-16T02:00:00
2024-07-16T03:00:00
2024-07-16T04:00:00
2024-07-16T05:00:00
2024-07-16T06:00:00
2024-07-16T07:00:00
2024-07-16T08:00:00
2024-07-16T09:00:00
2024-07-16T10:00:00
2024-07-16T11:00:00
2024-07-16T12:00:00
2024-07-16T13:00:00
2024-07-16T14:00:00
2024-07-16T15:00:00
2024-07-16T16:00:00
2024-07-16T17:00:00
2024-07-16T18:00:00
2024-07-16T19:00:00
2024-07-16T20:00:00
2024-07-16T21:00:00
2024-07-16T22:00:00
2024-07-16T23:00:00
2024-07-17T00:00:00
2024-07-17T01:00:00


KeyboardInterrupt: 

In [42]:
import os
import pandas as pd

def download_smard_energy_mix_prediction(target_date: str, output_dir: str):
    
    # Define timezone
    local_timezone = pytz.timezone("Europe/Berlin")
    # Parse the target date
    date_string = f"{target_date} 00:00:00"
    local_date_object = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
    localized_date_object = local_timezone.localize(local_date_object)
    epoch_timestamp = int(local_date_object.timestamp())
    # Calculate the offset based on the weekday (0 = Monday, 6 = Sunday)
    weekday = localized_date_object.weekday() # Monday = 0, Sunday = 6
    hour_offset = weekday * 24
    timestamp_in_milliseconds = epoch_timestamp * 1000 - (hour_offset * 3600 * 1000)
   
    # Fetch data from the SMARD.de API (returns 168 hours of data starting from the time
    pred_wind_offs_url = f"https://www.smard.de/app/chart_data/3791/DE/3791_DE_hour_{timestamp_in_milliseconds}.json" # w
    pred_wind_ons_url = f"https://www.smard.de/app/chart_data/123/DE/123_DE_hour_{timestamp_in_milliseconds}.json"
    pred_solar_url = f"https://www.smard.de/app/chart_data/125/DE/125_DE_hour_{timestamp_in_milliseconds}.json"
    
    pred_wind_offs_response = requests.get(pred_wind_offs_url)
    pred_wind_ons_response = requests.get(pred_wind_ons_url)
    pred_solar_response = requests.get(pred_solar_url)

    responses = [pred_wind_offs_response, pred_wind_ons_response,pred_solar_response]

    dfs = []
    
    for r, name in zip(responses, ("Wind onshore", "Wind offshore","Solar")):
        if r.status_code != 200:
            print(f"Failed to fetch {name} data: {r.status_code}")
    
        data = r.json()
    
        if "series" not in data or not data["series"]:
            print(f"No {name} data available for the specified date.")
    
        series = data["series"]
    # Extract the 48-hour data for the given date
        start_index = hour_offset
        end_index = hour_offset + 48
        day_series = series[start_index:end_index]
        dts = [datetime.utcfromtimestamp(dt[0] / 1000).strftime('%Y-%m-%d %H:%M:%S') for dt in day_series]
        observed_output = [item[1] for item in day_series]

        df = pd.DataFrame({
            "Datetime": dts,
            name: observed_output
        })

        df["Datetime"] = pd.to_datetime(df["Datetime"], format="%Y-%m-%d %H:%M:%S")
        df.set_index("Datetime", inplace=True)

        dfs.append(df)
    df_merged = pd.concat(dfs, axis=1)
    return df_merged
    

print(download_smard_energy_mix_prediction("2024-10-30", "../data/"))

2024-10-30 00:00:00+01:00
1730242800
2
48
1730070000000
                     Wind onshore  Wind offshore     Solar
Datetime                                                  
2024-10-29 23:00:00       1943.75        5546.50      0.00
2024-10-30 00:00:00       2045.75        5331.25      0.00
2024-10-30 01:00:00       2155.75        5015.50      0.00
2024-10-30 02:00:00       2307.50        4714.50      0.00
2024-10-30 03:00:00       2462.75        4647.50      0.00
2024-10-30 04:00:00       2530.50        4742.25      0.00
2024-10-30 05:00:00       2484.00        4659.25      0.25
2024-10-30 06:00:00       2468.50        4429.25    476.50
2024-10-30 07:00:00       2475.00        4205.00   2653.25
2024-10-30 08:00:00       2475.50        4080.75   5648.75
2024-10-30 09:00:00       2413.00        4362.00   8597.00
2024-10-30 10:00:00       2334.25        4923.25  10651.25
2024-10-30 11:00:00       2268.25        5467.00  11319.00
2024-10-30 12:00:00       2213.50        5968.00  10525.50


In [44]:
print(1730242800 == 1730246400)

1730073600000 == 1730070000000

False


False

In [None]:
import requests
import json

def fetch_smard_data(ts):


    url = f"https://www.smard.de/app/chart_data/3791/DE/3791_DE_hour_{ts}.json"
    
    headers = {
        "Accept": "application/json, text/plain, */*",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "en-GB,en;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Pragma": "no-cache",
        "Referer": "https://www.smard.de/home/marktdaten",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15"
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        data = response.json()
        with open("smard_data.json", "w", encoding="utf-8") as file:
            json.dump(data, file, indent=4, ensure_ascii=False)
        print(f"Data fetched and saved successfully! {ts}")
        return data
    else:
        print(f"Failed to fetch data. Status code: {response}")
        return None
    
milliseconds_in_24_hours = 24 * 60 * 60 * 1000 * 7
ts = 1737932400000
for _ in range(100):
    fetch_smard_data(ts)
    ts = ts - milliseconds_in_24_hours
    


Data fetched and saved successfully! 1737932400000
Data fetched and saved successfully! 1737327600000
Data fetched and saved successfully! 1736722800000
Data fetched and saved successfully! 1736118000000
Data fetched and saved successfully! 1735513200000
Data fetched and saved successfully! 1734908400000
Data fetched and saved successfully! 1734303600000
Data fetched and saved successfully! 1733698800000
Data fetched and saved successfully! 1733094000000
Data fetched and saved successfully! 1732489200000
Data fetched and saved successfully! 1731884400000
Data fetched and saved successfully! 1731279600000
Data fetched and saved successfully! 1730674800000
Data fetched and saved successfully! 1730070000000
Failed to fetch data. Status code: <Response [404]>
Failed to fetch data. Status code: <Response [404]>
Failed to fetch data. Status code: <Response [404]>
Failed to fetch data. Status code: <Response [404]>
Failed to fetch data. Status code: <Response [404]>
Failed to fetch data. Stat

KeyboardInterrupt: 

In [None]:
timestamp = 1737932400000
# 24 hours in milliseconds
milliseconds_in_24_hours = 24 * 60 * 60 * 1000
new_timestamp = timestamp + milliseconds_in_24_hours
print(new_timestamp)

- Weather:
-- wind
-- sun 
-- temp

- per day energy mix
- gas price per day
- 

In [None]:
start_date = datetime.now()
end_date = datetime(2018, 9, 30)
delta = timedelta(days=1)
delay = 0.2

# end_date = start_date - (10 * delta)

base_url = "https://www.energy-charts.info/charts/energy_pie/data/de/day_pie_{}.json"

current_date = start_date
res = []
while current_date >= end_date:
    try:
        cd_format = current_date.strftime("%Y_%m_%d")
        response = scrape(base_url.format(cd_format), delay)

        logging.info(f"Successfully scraped data for date: {cd_format}")
        res.append((cd_format, response.json()))
    except requests.exceptions.HTTPError as http_err:
        logging.warning(f"Failed to scrape data for date: {cd_format} (UTC)\n\tError: {http_err}")
    except requests.exceptions.JSONDecodeError as decoder_error:
        logging.warning(f"Failed to deserialize JSON: \n\tError: {decoder_error}")
    current_date -= delta


print(len(res))


### Energy Mix Scraper

In [None]:
exclude_cross_boarder_e_trading = True
cbet = "Cross border electricity trading"

dtype = [('date', 'U50'), ('e_component', 'U50'), ('value', 'float32')]

# Initialize an empty structured array
array = np.empty(0, dtype=dtype)

for date, data in res:
    sources = []
    for e_source in data:
        name = str(e_source["name"]["en"])

        if exclude_cross_boarder_e_trading and name == cbet:
            continue

        # Ensure numeric conversion or default to 0
        try:
            y_value = float(e_source["y"])
        except (ValueError, TypeError):
            continue
        
        sources.append((date, name, y_value))
    
    # Convert to a structured array with the correct dtype
    arr = np.array(sources, dtype=dtype)
    
    # Normalize the 'value' column
    arr['value'] /= np.sum(arr['value'], axis=0)

    # Append to the main array
    array = np.append(array, arr)

np.savetxt("../data/daily_market_mix.csv", array, delimiter=",", fmt="%s")
array

In [None]:
POST /api/raw-data HTTP/1.1
Content-Type: application/json
Accept: */*
Sec-Fetch-Site: cross-site
Accept-Language: en-GB,en;q=0.9
Accept-Encoding: gzip, deflate, br
Sec-Fetch-Mode: cors
Origin: https://www.agora-energiewende.de
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15
Content-Length: 538
Referer: https://www.agora-energiewende.de/
Connection: keep-alive
Sec-Fetch-Dest: empty
X-Requested-With: XMLHttpRequest
Api-key: agora_live_62ce76dd202927.67115829
Priority: u=3, i


{"filters":{"from":"2023-11-01","to":"2024-10-01","generation":["Total electricity demand","Biomass","Hydro","Wind offshore","Wind onshore","Solar","Total conventional power plant","Nuclear","Lignite","Hard Coal","Natural Gas","Pumped storage generation","Other","Grid emission factor","Total grid emissions","Total Renewables","Total Conventional","Renewable share","Conventional share"]},"x_coordinate":"date_id","y_coordinate":"value","view_name":"live_gen_plus_emi_de_hourly","kpi_name":"power_generation","z_coordinate":"generation"}

In [None]:
import requests

# Define the API endpoint and headers
url = "https://api.agora-energy.org/api/raw-data"
headers = {
    "Content-Type": "application/json",
    "Accept": "*/*",
    "Sec-Fetch-Site": "cross-site",
    "Accept-Language": "en-GB,en;q=0.9",
    "Accept-Encoding": "gzip, deflate, br",
    "Sec-Fetch-Mode": "cors",
    "Origin": "https://www.agora-energiewende.de",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.0.1 Safari/605.1.15",
    "Referer": "https://www.agora-energiewende.de/",
    "Connection": "keep-alive",
    "Sec-Fetch-Dest": "empty",
    "X-Requested-With": "XMLHttpRequest",
    "Api-key": "agora_live_62ce76dd202927.67115829",
}

out = []

# Define the payload
for year in [2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024]:
    payload = {
        "filters": {
            "from": f"{year}-10-01",
            "to": f"{year + 1}-09-30",
            "generation": [
                "Total electricity demand", "Biomass", "Hydro", "Wind offshore",
                "Wind onshore", "Solar", "Total conventional power plant", "Nuclear",
                "Lignite", "Hard Coal", "Natural Gas", "Pumped storage generation",
                "Other", "Grid emission factor", "Total grid emissions", "Total Renewables",
                "Total Conventional", "Renewable share", "Conventional share"
            ]
        },
        "x_coordinate": "date_id",
        "y_coordinate": "value",
        "view_name": "live_gen_plus_emi_de_hourly",
        "kpi_name": "power_generation",
        "z_coordinate": "generation"
    }

    # Make the POST request
    response = requests.post(url, headers=headers, json=payload)

    # Check the response
    if response.status_code == 200:
        print("Request was successful!", year, year+1)
        data = {}
        data = response.json()
        out.extend(data["data"]["data"])
    else:
        print(f"Request failed with status code {response.status_code}", year, year+1)
    time.sleep(0.3)


In [None]:
np.savetxt("../data/hourly_market_mix.csv", np.array(out), delimiter=",", fmt="%s")
data = np.array(out)

In [None]:
#"../data/hourly_market_mix.csv" :
# 2017-10-01T00:00:00,5.276,Biomass
# 2017-10-01T00:00:00,445.727,Grid emission factor
# 2017-10-01T00:00:00,3.738,Hard Coal
# 2017-10-01T00:00:00,2.19,Hydro
# 2017-10-01T00:00:00,15.846,Lignite
# 2017-10-01T00:00:00,6.634,Natural Gas
# 2017-10-01T00:00:00,10.19,Nuclear
# 2017-10-01T00:00:00,1.763,Other
# 2017-10-01T00:00:00,0.235,Pumped storage generation
# 2017-10-01T00:00:00,0.0,Solar
# 2017-10-01T00:00:00,38.406,Total conventional power plant
# 2017-10-01T00:00:00,49.965,Total electricity demand
# 2017-10-01T00:00:00,24676.021,Total grid emissions
# 2017-10-01T00:00:00,1.048,Wind offshore
# 2017-10-01T00:00:00,8.442,Wind onshore
# 2017-10-01T01:00:00,5.269,Biomass
# 2017-10-01T01:00:00,450.157,Grid emission factor
# 2017-10-01T01:00:00,3.5,Hard Coal
# 2017-10-01T01:00:00,2.202,Hydro
# 2017-10-01T01:00:00,15.843,Lignite
# 2017-10-01T01:00:00,6.752,Natural Gas
# 2017-10-01T01:00:00,10.307,Nuclear
# 2017-10-01T01:00:00,1.788,Other
# 2017-10-01T01:00:00,0.187,Pumped storage generation
# 2017-10-01T01:00:00,0.0,Solar
# 2017-10-01T01:00:00,38.377,Total conventional power plant
# 2017-10-01T01:00:00,49.062,Total electricity demand
# 2017-10-01T01:00:00,24548.501,Total grid emissions
# 2017-10-01T01:00:00,0.907,Wind offshore
# 2017-10-01T01:00:00,7.778,Wind onshore



mix_rows = []
delta_abs_mix_rows = []
delta_mix_rows = []
other_metrics_rows = []

energy_by_hour = np.loadtxt("../data/hourly_market_mix.csv", delimiter=",", dtype=str)


# Defining Categories
mix_categories = [
    "Biomass",
    "Hard Coal",
    "Hydro",
    "Lignite",
    "Natural Gas",
    "Nuclear",
    "Other",
    "Pumped storage generation",
    "Solar",
    "Wind offshore",
    "Wind onshore",
]

other_metrics = [
    "Grid emission factor",
    "Total conventional power plant",
    "Total electricity demand",
    "Total grid emissions",
]

# Define start and end dates as naive datetime objects
start_date = datetime.fromisoformat("2018-10-01T00:00:00")
end_date = datetime.fromisoformat("2024-10-30T00:00:00")

# Generate list of hourly timestamps betweens start and end date
timestamps = [
    start_date + timedelta(hours=i)
    for i in range(int((end_date - start_date).total_seconds() // 3600) + 1)
]

# Converts the timestamps to ISO format
timestamp_strings = [ts.isoformat() for ts in timestamps]

# create  dict of timestamps as keys with empty lists
data_dict = {ts: [] for ts in timestamp_strings}

# add each energy type to dict after iso conversion
for d in energy_by_hour:
    d_timestamp = datetime.fromisoformat(d[0]).isoformat()  # Naive datetime conversion
    if d_timestamp in data_dict:
        data_dict[d_timestamp].append(d)


for ts in timestamp_strings:
    
    hour_data = np.array(data_dict.get(ts, []))  # Fetch data for this timestamp
    
    if hour_data.size == 0:
        continue

    mix_per_hour = hour_data[np.isin(hour_data[:, 2], mix_categories)]
    
    if mix_per_hour.size == 0:
        continue

    if "Nuclear" not in mix_per_hour[:, 2]:
        mix_per_hour = np.insert(mix_per_hour, 6, [ts, "0.0", "Nuclear"], axis=0)


    # fill None values with 0
    mix_per_hour = np.where(mix_per_hour == "None", 0.0, mix_per_hour) 

    total_demand = hour_data[np.isin(hour_data[:, 2], ["Total electricity demand"])][0][1] # shorten
    
    total_demand = total_demand.astype(float)
    if total_demand != "None":

        # add delta to new dataset
        mix_per_hour_delta = np.copy(mix_per_hour)

        # get delta between total energy demand and produced energy in mix
        delta = total_demand - mix_per_hour[:, 1].astype(float).sum() 

        mix_per_hour_delta = np.insert(mix_per_hour_delta, mix_per_hour_delta.shape[0], [ts, delta, "delta"], axis=0)

        # retain absolute delta dataset
        mix_per_hour_delta_abs = np.copy(mix_per_hour_delta)

        # create percentages relative to energy demanded from delta dataset
        mix_per_hour_delta[:, 1] = (
            mix_per_hour_delta[:, 1].astype(float) / total_demand
        )

        mix_per_hour_delta_abs_row = np.concatenate(([ts], mix_per_hour_delta_abs[:, 1]))
        delta_abs_mix_rows.append(mix_per_hour_delta_abs_row)
        
        mix_per_hour_delta_row = np.concatenate(([ts], mix_per_hour_delta[:, 1]))
        delta_mix_rows.append(mix_per_hour_delta_row)



    # create percentages relative to produced energy from original dataset
    mix_per_hour[:, 1] = (
        mix_per_hour[:, 1].astype(float) / mix_per_hour[:, 1].astype(float).sum()
    )
    
    mix_per_hour_row = np.concatenate(([ts], mix_per_hour[:, 1]))
    mix_rows.append(mix_per_hour_row)

    other_metrics_per_hour = hour_data[np.isin(hour_data[:, 2], other_metrics)]
    if other_metrics_per_hour.size == 0:
        continue
    row = np.concatenate(([ts], other_metrics_per_hour[:, 1]))
    
    other_metrics_rows.append(row)

    
percentage_mix = np.vstack([["Timestamp"] + mix_categories] + mix_rows)
mix_categories.append("Delta")
delta_abs_mix = np.vstack([["Timestamp"] + mix_categories] + delta_abs_mix_rows)
delta_percentage_mix = np.vstack([["Timestamp"] + mix_categories] + delta_mix_rows)
percentage_sources = np.vstack([["Timestamp"] + other_metrics] + other_metrics_rows)

np.savetxt("../data/hourly_market_mix_cleaned.csv", percentage_mix, delimiter=",", fmt="%s")
np.savetxt("../data/hourly_market_mix_delta.csv", delta_percentage_mix, delimiter=",", fmt="%s")
np.savetxt("../data/hourly_market_mix_delta_abs.csv", delta_abs_mix, delimiter=",", fmt="%s")
np.savetxt("../data/hourly_market_metrics_cleaned.csv", percentage_sources, delimiter=",", fmt="%s")

['Delta' '-0.06395637084779374' '-0.07241054432886612' ...
 '0.10873861726297138' '0.09192833825586821' '0.09788072545944428']


### Weather Data

In [None]:
    coordinates = [
    {"latitude": 52.5200, "longitude": 13.4050},  # Berlin
    {"latitude": 48.1351, "longitude": 11.5820},  # Munich
    {"latitude": 50.1109, "longitude": 8.6821},   # Frankfurt
    {"latitude": 51.1657, "longitude": 10.4515},  # Central Germany (approximate)
    {"latitude": 53.5511, "longitude": 9.9937},   # Hamburg
    {"latitude": 51.2277, "longitude": 6.7735},   # Düsseldorf
    {"latitude": 51.0504, "longitude": 13.7373},  # Dresden
    {"latitude": 50.9375, "longitude": 6.9603},   # Cologne
    {"latitude": 49.4875, "longitude": 8.4660},   # Mannheim
    {"latitude": 48.7758, "longitude": 9.1829},   # Stuttgart
    {"latitude": 51.3397, "longitude": 12.3731},  # Leipzig
    {"latitude": 50.0782, "longitude": 8.2398},   # Wiesbaden
    {"latitude": 49.0069, "longitude": 8.4037},   # Karlsruhe
    {"latitude": 51.5128, "longitude": 7.4633},   # Dortmund
    {"latitude": 50.1211, "longitude": 8.4965},   # Offenbach
    {"latitude": 50.3569, "longitude": 7.5886},   # Koblenz
    {"latitude": 50.7753, "longitude": 6.0839},   # Aachen
    {"latitude": 49.4521, "longitude": 11.0767},  # Nuremberg
    {"latitude": 52.3759, "longitude": 9.7320},   # Hanover
    {"latitude": 51.4818, "longitude": 7.2162},   # Bochum
    {"latitude": 51.4556, "longitude": 7.0116},   # Essen
    {"latitude": 51.4344, "longitude": 6.7623},   # Duisburg
    {"latitude": 51.9607, "longitude": 7.6261},   # Münster
]
    wind_parks = [
    {"latitude": 54.008333, "longitude": 6.598333, "weight": 60},      # Alpha Ventus
    {"latitude": 54.358333, "longitude": 5.975, "weight": 400},        # BARD Offshore I
    {"latitude": 53.690, "longitude": 6.480, "weight": 113.4},         # Riffgat
    {"latitude": 54.15, "longitude": 7.25, "weight": 295},             # Amrumbank West
    {"latitude": 54.53, "longitude": 6.25, "weight": 200},             # Butendiek
    {"latitude": 54.367, "longitude": 6.467, "weight": 295},           # DanTysk
    {"latitude": 54.480, "longitude": 7.370, "weight": 288},           # Meerwind Süd|Ost
    {"latitude": 54.4, "longitude": 6.6, "weight": 576},               # Gode Wind 1 & 2
    {"latitude": 54.30, "longitude": 6.65, "weight": 400},             # Global Tech I
    {"latitude": 53.88, "longitude": 6.59, "weight": 450},             # Borkum Riffgrund 1
    {"latitude": 53.88, "longitude": 6.59, "weight": 215},             # Borkum Riffgrund 2
    {"latitude": 54.00, "longitude": 6.58, "weight": 342},             # Trianel Windpark Borkum
    {"latitude": 54.22, "longitude": 6.63, "weight": 332},             # Nordsee Ost
    {"latitude": 54.25, "longitude": 7.25, "weight": 385},             # Hohe See
    {"latitude": 54.28, "longitude": 7.30, "weight": 252},             # Albatros
    {"latitude": 54.48, "longitude": 6.78, "weight": 350},             # Wikinger
    {"latitude": 54.55, "longitude": 6.37, "weight": 402},             # Arkona
    {"latitude": 54.45, "longitude": 6.58, "weight": 600},             # Veja Mate
    {"latitude": 54.33, "longitude": 7.18, "weight": 300},             # Deutsche Bucht
    {"latitude": 54.25, "longitude": 7.18, "weight": 402},             # Kaskasi
    {"latitude": 53.610278, "longitude": 7.429167, "weight": 318.2},  # Windpark Holtriem-Dornum
    {"latitude": 53.973889, "longitude": 8.933333, "weight": 302.45},  # Windpark Friedrichskoog
    {"latitude": 54.611111, "longitude": 8.903611, "weight": 293.4},  # Bürgerwindpark Reußenköge
    {"latitude": 53.338333, "longitude": 13.764444, "weight": 242.5},  # Windfeld Uckermark
    {"latitude": 53.715278, "longitude": 13.319722, "weight": 202.85},  # RH2-Werder/Kessin/Altentreptow
    {"latitude": 51.131667, "longitude": 11.964167, "weight": 188.1},  # Windpark Stößen-Teuchern
    {"latitude": 52.539722, "longitude": 12.871667, "weight": 175.2},  # Windpark Ketzin
    {"latitude": 52.515833, "longitude": 11.780833, "weight": 151.3},  # Windpark Hüselitz
    {"latitude": 51.031667, "longitude": 10.629722, "weight": 152.25},  # Windfeld Wangenheim-Hochheim-Ballstädt-Westhausen
    {"latitude": 52.354722, "longitude": 14.373056, "weight": 133.9},  # Windpark Odervorland
    {"latitude": 51.640278, "longitude": 8.912222, "weight": 129.445},  # Windpark Asseln
    {"latitude": 52.001389, "longitude": 12.830833, "weight": 128.2},  # Windpark Feldheim
    {"latitude": 51.395556, "longitude": 11.709167, "weight": 122.1},  # Windpark Esperstedt-Obhausen
    {"latitude": 51.960833, "longitude": 11.606389, "weight": 114.45},  # Windpark Biere-Borne
    {"latitude": 53.3375, "longitude": 7.095833, "weight": 106.25},  # Windpark Wybelsumer Polder
    {"latitude": 53.388056, "longitude": 7.377778, "weight": 102.34},  # Windpark Ihlow
    {"latitude": 52.015556, "longitude": 13.193333, "weight": 98.8},  # Windpark Heidehof
    {"latitude": 51.546389, "longitude": 13.868611, "weight": 93.1},  # Windpark Klettwitz
    {"latitude": 52.662778, "longitude": 11.709167, "weight": 93.5},  # Windpark Schinne-Grassau
    {"latitude": 51.989722, "longitude": 10.833333, "weight": 92.4},  # Windpark Druiberg
    {"latitude": 51.579722, "longitude": 11.708611, "weight": 89.3},  # Windpark Beesenstedt-Rottelsdorf
    {"latitude": 52.123333, "longitude": 11.160000, "weight": 87.65},  # Windpark Ausleben-Badeleben-Wormsdorf
    {"latitude": 53.070833, "longitude": 7.739167, "weight": 86.5},  # Windpark Saterland
    {"latitude": 51.721111, "longitude": 11.644167, "weight": 83.35},  # Windpark Alsleben
    {"latitude": 51.798611, "longitude": 11.491944, "weight": 83.05},  # Windpark Blaue Warthe
    {"latitude": 51.474167, "longitude": 13.249722, "weight": 82.8},  # Windfeld Randowhöhe
    {"latitude": 51.173056, "longitude": 11.350556, "weight": 82.65},  # Windpark Rastenberg-Olbersleben
    {"latitude": 51.975833, "longitude": 11.451944, "weight": 79.1},  # Windpark Egeln-Nord
    {"latitude": 53.363056, "longitude": 7.705000, "weight": 77.4},  # Windpark Wiesmoor
    {"latitude": 51.774444, "longitude": 12.700833, "weight": 77.1},  # Windpark Dorna-Kemberg-Schnellin
    {"latitude": 52.027778, "longitude": 11.367778, "weight": 76.9},  # Windfeld Sonnenberg
    {"latitude": 53.320833, "longitude": 12.026944, "weight": 75.2},  # Windpark Jännersdorf
    {"latitude": 51.617222, "longitude": 8.803333, "weight": 75.05},  # Windpark Altenautal
    {"latitude": 52.192500, "longitude": 11.368056, "weight": 71.3},  # Windpar Bornstedt-Nordgermersleben-Rottmersleben-Schackensleben
    {"latitude": 51.642500, "longitude": 11.658333, "weight": 72},  # Windpark Gerbstedt-Ihlewitz
    {"latitude": 49.964722, "longitude": 7.652500, "weight": 70.5},  # Hunsrück-Windpark Ellern
    {"latitude": 52.867500, "longitude": 7.138889, "weight": 70.1},  # Windpark Haren
    {"latitude": 51.041111, "longitude": 6.530000, "weight": 67.2},  # Windpark Königshovener Höhe
    {"latitude": 51.445278, "longitude": 8.696944, "weight": 65.95},  # Windpark Madfeld-Bleiwäsche
    {"latitude": 53.817778, "longitude": 8.078889, "weight": 65.6},  # Windpark Altenbruch
    {"latitude": 52.176389, "longitude": 11.300000, "weight": 64.1},  # Windpark Hakenstedt
    {"latitude": 51.946111, "longitude": 14.462778, "weight": 64},  # Windpark Cottbuser Halde
    {"latitude": 51.707778, "longitude": 12.239167, "weight": 62.7},  # Windpark Thurland
    {"latitude": 49.689167, "longitude": 8.106944, "weight": 61},  # Windfeld Rheinhessen-Pfalz
    {"latitude": 50.003333, "longitude": 7.386667, "weight": 59.8},  # Windpark Kirchberg im Faas
    {"latitude": 51.040556, "longitude": 11.620278, "weight": 59.1},  # Windpark Eckolstädt
    {"latitude": 51.247500, "longitude": 10.283889, "weight": 58.1},  # Windpark Büttstedt
    {"latitude": 51.072778, "longitude": 11.789444, "weight": 58},  # Windpark Molau-Leislau
    {"latitude": 54.483333, "longitude": 11.110000, "weight": 57.5},  # Windpark Fehmarn-Mitte
    {"latitude": 49.830000, "longitude": 8.138889, "weight": 55},  # Windpark Wörrstadt
    {"latitude": 49.296111, "longitude": 9.415556, "weight": 54.9},  # Windpark Harthäuser Wald
    {"latitude": 53.373333, "longitude": 9.496944, "weight": 52.9},  # Windpark Ahrenswohlde-Wohnste
    {"latitude": 48.980833, "longitude": 11.102500, "weight": 52.8},  # Windpark Raitenbucher Forst
    {"latitude": 48.740000, "longitude": 9.889722, "weight": 52.25},  # Windpark Lauterstein
    {"latitude": 49.721111, "longitude": 7.721944, "weight": 52.2},  # Windpark Lettweiler Höhe
    {"latitude": 50.603056, "longitude": 9.243889, "weight": 49.65},  # Windpark Goldner Steinrück
    {"latitude": 50.516944, "longitude": 6.373611, "weight": 49.45},  # Windpark Schleiden-Schöneseiffen
    {"latitude": 53.538889, "longitude": 8.952778, "weight": 48.8},  # Windpark Köhlen
    {"latitude": 49.764167, "longitude": 8.059722, "weight": 47.1},  # Windpark Heimersheim
    {"latitude": 53.396667, "longitude": 14.169167, "weight": 46.3},  # Windfeld Wolfsmoor
    {"latitude": 53.684167, "longitude": 8.646111, "weight": 46},  # Windpark Holßel
    {"latitude": 51.838333, "longitude": 12.875278, "weight": 44.9},  # Windpark Elster
    {"latitude": 52.002222, "longitude": 12.123056, "weight": 44.4},  # Windpark Zerbst
    {"latitude": 52.178333, "longitude": 11.886111, "weight": 43.6},  # Windpark Stegelitz-Ziepel-Tryppehna
    {"latitude": 53.606944, "longitude": 8.793056, "weight": 43.2},  # Windpark Kührstedt-Alfstedt
    {"latitude": 52.060111, "longitude": 14.381000, "weight": 43.2},  # Windpark Ullersdorf
    {"latitude": 49.813333, "longitude": 8.017778, "weight": 42.4},  # Windpark Gau-Bickelheim
    {"latitude": 51.422778, "longitude": 11.834444, "weight": 42},  # Windpark Holleben-Bad Lauchstädt
    {"latitude": 54.648611, "longitude": 9.176389, "weight": 41.8},  # Bürgerwindpark Löwenstedt
    {"latitude": 50.623861, "longitude": 9.153528, "weight": 41.6},  # Windpark Feldatal
    {"latitude": 51.413056, "longitude": 11.587222, "weight": 41},  # Windpark Farnstädt
    {"latitude": 52.976667, "longitude": 7.415833, "weight": 40.9},  # Windpark Dörpen-Ost
    {"latitude": 52.878056, "longitude": 10.042778, "weight": 40.5},  # Windpark Hermannsburg
    {"latitude": 52.900000, "longitude": 12.384167, "weight": 40.4},  # Windpark Kyritz-Plänitz-Zernitz
    {"latitude": 52.597222, "longitude": 12.266667, "weight": 40},  # Windpark Stüdenitz
]
    sun_parks = [
    {"latitude": 51.3167, "longitude": 12.3667, "weight": 605},   # Witznitz
    {"latitude": 51.3236, "longitude": 12.6511, "weight": 52},    # Waldpolenz Solar Park
    {"latitude": 51.7625, "longitude": 13.6000, "weight": 52.3},  # Walddrehna Solar Park
    {"latitude": 53.9239, "longitude": 13.2256, "weight": 52},    # Tutow Solar Park
    {"latitude": 53.0290, "longitude": 13.5336, "weight": 128.5}, # Templin Solar Park
    {"latitude": 48.8031, "longitude": 12.7669, "weight": 54},    # Strasskirchen Solar Park
    {"latitude": 53.6391, "longitude": 12.3643, "weight": 76},    # Solarpark Zietlitz
    {"latitude": 52.6475, "longitude": 13.6916, "weight": 187},   # Solarpark Weesow-Willmersdorf
    {"latitude": 53.5267, "longitude": 11.6609, "weight": 172},   # Solarpark Tramm-Göhten
    {"latitude": 48.6490, "longitude": 11.2782, "weight": 120},   # Solarpark Schornhof
    {"latitude": 51.5450, "longitude": 13.9800, "weight": 166},   # Solarpark Meuro
    {"latitude": 50.5960, "longitude": 9.3690, "weight": 54.7},   # Solarpark Lauterbach
    {"latitude": 52.6413, "longitude": 14.1923, "weight": 150},   # Solarpark Gottesgabe
    {"latitude": 53.3818, "longitude": 12.2688, "weight": 65},    # Solarpark Ganzlin
    {"latitude": 53.4148, "longitude": 12.2470, "weight": 90},    # Solarpark Gaarz
    {"latitude": 52.8253, "longitude": 13.6983, "weight": 84.7},  # Solarpark Finow Tower
    {"latitude": 52.6975, "longitude": 14.2300, "weight": 150},   # Solarpark Alttrebbin
    {"latitude": 53.2000, "longitude": 12.5167, "weight": 67.8},  # Solarpark Alt Daber
    {"latitude": 52.6139, "longitude": 14.2425, "weight": 145},   # Neuhardenberg Solar Park
    {"latitude": 51.9319, "longitude": 14.4072, "weight": 71.8},  # Lieberose Photovoltaic Park
    {"latitude": 51.5686, "longitude": 13.7375, "weight": 80.7},  # Finsterwalde Solar Park
    {"latitude": 54.6294, "longitude": 9.3433, "weight": 83.6},   # Eggebek Solar Park
    {"latitude": 52.4367, "longitude": 12.4514, "weight": 91}     # Brandenburg-Briest Solarpark
]


In [7]:
import requests
import requests_cache
import pandas as pd
from datetime import datetime
import time

def fetch_weather_data(csv_file_path):
    # Read the CSV file to get the last date
    df = pd.read_csv(csv_file_path)
    start_date = pd.to_datetime(df['date']).max().strftime('%Y-%m-%d')
    end_date = datetime.now().strftime('%Y-%m-%d')

    # Setup the Open-Meteo API client with caching and retries
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = requests.Session()
    retry_session.mount('https://', requests.adapters.HTTPAdapter(max_retries=5))

    # Define the base URL for the weather API
    historical_url = "https://archive-api.open-meteo.com/v1/archive"
    coordinates = [
        {"latitude": 52.5200, "longitude": 13.4050},  # Berlin
        {"latitude": 48.1351, "longitude": 11.5820},  # Munich
    ]
    wind_parks = [
        {"latitude": 54.008333, "longitude": 6.598333, "weight": 60},  # Alpha Ventus
        {"latitude": 54.358333, "longitude": 5.975, "weight": 400},    # BARD Offshore I
    ]
    sun_parks = [
        {"latitude": 51.3167, "longitude": 12.3667, "weight": 605},    # Witznitz
        {"latitude": 51.3236, "longitude": 12.6511, "weight": 52},     # Waldpolenz Solar Park
    ]

    def fetch_data(params_template, locations, variable):
        all_data = []
        for coord in locations:
            params = params_template.copy()
            params.update({"latitude": coord["latitude"], "longitude": coord["longitude"]})
            while True:
                try:
                    response = retry_session.get(historical_url, params=params)
                    response.raise_for_status()
                    data = response.json()
                    df = pd.DataFrame(data["hourly"][variable])
                    df["date"] = pd.to_datetime(df["time"])
                    df.set_index("date", inplace=True)
                    all_data.append(df)
                    break
                except Exception as e:
                    print(f"Error fetching data for coordinates {coord}: {e}")
                    time.sleep(1)
        return pd.concat(all_data)

    # Fetch and process data
    params_template = {"start_date": start_date, "end_date": end_date, "hourly": ["temperature_2m", "precipitation", "wind_speed_100m", "direct_radiation"]}
    weather_data = fetch_data(params_template, coordinates, "temperature_2m")
    weather_data["precipitation"] = fetch_data(params_template, coordinates, "precipitation")["precipitation"]
    weather_data["wind_speed_100m"] = fetch_data(params_template, coordinates, "wind_speed_100m")["wind_speed_100m"]
    weather_data["direct_radiation"] = fetch_data(params_template, coordinates, "direct_radiation")["direct_radiation"]

    # Save the averaged data to a CSV file
    weather_data.groupby("date").mean().rename(columns={"precipitation": "Precipitation (rain/snow)"}).to_csv(csv_file_path, index=True)

    # Fetch and process wind data
    params_template["hourly"] = ["wind_speed_100m"]
    wind_data = fetch_data(params_template, wind_parks, "wind_speed_100m")
    total_weight = sum(park["weight"] for park in wind_parks)
    wind_data = wind_data.groupby("date", as_index=False).apply(lambda x: sum(x["wind_speed_100m"] * park["weight"] for park in wind_parks) / total_weight)
    wind_data.to_csv("../data/weighted_windspeed.csv", index=True, header=["date", "windspeed 100m"])

    # Fetch and process sun data
    params_template["hourly"] = ["direct_radiation"]
    sun_data = fetch_data(params_template, sun_parks, "direct_radiation")
    total_weight = sum(park["weight"] for park in sun_parks)
    sun_data = sun_data.groupby("date", as_index=False).apply(lambda x: sum(x["direct_radiation"] * park["weight"] for park in sun_parks) / total_weight)
    sun_data.to_csv("../data/weighted_sun_radiation.csv", index=True, header=["date", "direct_radiation"])

    # Update the original weather data with the new weighted values
    df_replacement_sun = pd.read_csv('../data/weighted_sun_radiation.csv')
    df_replacement_wind = pd.read_csv('../data/weighted_windspeed.csv')
    df['direct_radiation'] = df_replacement_sun['direct_radiation']
    df['wind_speed_100m'] = df_replacement_wind['windspeed 100m']
    df.to_csv(csv_file_path, index=False)

fetch_weather_data("../data/germany_weather_average.csv")

Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitude': 13.405}: 'time'
Error fetching data for coordinates {'latitude': 52.52, 'longitud

KeyboardInterrupt: 

In [27]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from datetime import datetime, timedelta
#either give an endtime or how many hours you need for the forecast
#API only contains till the 01.11.2024 historical forecast data
def fetch_forecast(start_date, end_date=None, hours=None):
    # Setup the Open-Meteo API client with caching and retries
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    # Define the base URL for the weather API
    forecast_url = "https://api.open-meteo.com/v1/forecast"
    coordinates = [
    {"latitude": 52.5200, "longitude": 13.4050},  # Berlin
    {"latitude": 48.1351, "longitude": 11.5820},  # Munich
    {"latitude": 50.1109, "longitude": 8.6821},   # Frankfurt
    {"latitude": 51.1657, "longitude": 10.4515},  # Central Germany (approximate)
    {"latitude": 53.5511, "longitude": 9.9937},   # Hamburg
    {"latitude": 51.2277, "longitude": 6.7735},   # Düsseldorf
    {"latitude": 51.0504, "longitude": 13.7373},  # Dresden
    {"latitude": 50.9375, "longitude": 6.9603},   # Cologne
    {"latitude": 49.4875, "longitude": 8.4660},   # Mannheim
    {"latitude": 48.7758, "longitude": 9.1829},   # Stuttgart
    {"latitude": 51.3397, "longitude": 12.3731},  # Leipzig
    {"latitude": 50.0782, "longitude": 8.2398},   # Wiesbaden
    {"latitude": 49.0069, "longitude": 8.4037},   # Karlsruhe
    {"latitude": 51.5128, "longitude": 7.4633},   # Dortmund
    {"latitude": 50.1211, "longitude": 8.4965},   # Offenbach
    {"latitude": 50.3569, "longitude": 7.5886},   # Koblenz
    {"latitude": 50.7753, "longitude": 6.0839},   # Aachen
    {"latitude": 49.4521, "longitude": 11.0767},  # Nuremberg
    {"latitude": 52.3759, "longitude": 9.7320},   # Hanover
    {"latitude": 51.4818, "longitude": 7.2162},   # Bochum
    {"latitude": 51.4556, "longitude": 7.0116},   # Essen
    {"latitude": 51.4344, "longitude": 6.7623},   # Duisburg
    {"latitude": 51.9607, "longitude": 7.6261},   # Münster
]
    wind_parks = [
    {"latitude": 54.008333, "longitude": 6.598333, "weight": 60},      # Alpha Ventus
    {"latitude": 54.358333, "longitude": 5.975, "weight": 400},        # BARD Offshore I
    {"latitude": 53.690, "longitude": 6.480, "weight": 113.4},         # Riffgat
    {"latitude": 54.15, "longitude": 7.25, "weight": 295},             # Amrumbank West
    {"latitude": 54.53, "longitude": 6.25, "weight": 200},             # Butendiek
    {"latitude": 54.367, "longitude": 6.467, "weight": 295},           # DanTysk
    {"latitude": 54.480, "longitude": 7.370, "weight": 288},           # Meerwind Süd|Ost
    {"latitude": 54.4, "longitude": 6.6, "weight": 576},               # Gode Wind 1 & 2
    {"latitude": 54.30, "longitude": 6.65, "weight": 400},             # Global Tech I
    {"latitude": 53.88, "longitude": 6.59, "weight": 450},             # Borkum Riffgrund 1
    {"latitude": 53.88, "longitude": 6.59, "weight": 215},             # Borkum Riffgrund 2
    {"latitude": 54.00, "longitude": 6.58, "weight": 342},             # Trianel Windpark Borkum
    {"latitude": 54.22, "longitude": 6.63, "weight": 332},             # Nordsee Ost
    {"latitude": 54.25, "longitude": 7.25, "weight": 385},             # Hohe See
    {"latitude": 54.28, "longitude": 7.30, "weight": 252},             # Albatros
    {"latitude": 54.48, "longitude": 6.78, "weight": 350},             # Wikinger
    {"latitude": 54.55, "longitude": 6.37, "weight": 402},             # Arkona
    {"latitude": 54.45, "longitude": 6.58, "weight": 600},             # Veja Mate
    {"latitude": 54.33, "longitude": 7.18, "weight": 300},             # Deutsche Bucht
    {"latitude": 54.25, "longitude": 7.18, "weight": 402},             # Kaskasi
    {"latitude": 53.610278, "longitude": 7.429167, "weight": 318.2},  # Windpark Holtriem-Dornum
    {"latitude": 53.973889, "longitude": 8.933333, "weight": 302.45},  # Windpark Friedrichskoog
    {"latitude": 54.611111, "longitude": 8.903611, "weight": 293.4},  # Bürgerwindpark Reußenköge
    {"latitude": 53.338333, "longitude": 13.764444, "weight": 242.5},  # Windfeld Uckermark
    {"latitude": 53.715278, "longitude": 13.319722, "weight": 202.85},  # RH2-Werder/Kessin/Altentreptow
    {"latitude": 51.131667, "longitude": 11.964167, "weight": 188.1},  # Windpark Stößen-Teuchern
    {"latitude": 52.539722, "longitude": 12.871667, "weight": 175.2},  # Windpark Ketzin
    {"latitude": 52.515833, "longitude": 11.780833, "weight": 151.3},  # Windpark Hüselitz
    {"latitude": 51.031667, "longitude": 10.629722, "weight": 152.25},  # Windfeld Wangenheim-Hochheim-Ballstädt-Westhausen
    {"latitude": 52.354722, "longitude": 14.373056, "weight": 133.9},  # Windpark Odervorland
    {"latitude": 51.640278, "longitude": 8.912222, "weight": 129.445},  # Windpark Asseln
    {"latitude": 52.001389, "longitude": 12.830833, "weight": 128.2},  # Windpark Feldheim
    {"latitude": 51.395556, "longitude": 11.709167, "weight": 122.1},  # Windpark Esperstedt-Obhausen
    {"latitude": 51.960833, "longitude": 11.606389, "weight": 114.45},  # Windpark Biere-Borne
    {"latitude": 53.3375, "longitude": 7.095833, "weight": 106.25},  # Windpark Wybelsumer Polder
    {"latitude": 53.388056, "longitude": 7.377778, "weight": 102.34},  # Windpark Ihlow
    {"latitude": 52.015556, "longitude": 13.193333, "weight": 98.8},  # Windpark Heidehof
    {"latitude": 51.546389, "longitude": 13.868611, "weight": 93.1},  # Windpark Klettwitz
    {"latitude": 52.662778, "longitude": 11.709167, "weight": 93.5},  # Windpark Schinne-Grassau
    {"latitude": 51.989722, "longitude": 10.833333, "weight": 92.4},  # Windpark Druiberg
    {"latitude": 51.579722, "longitude": 11.708611, "weight": 89.3},  # Windpark Beesenstedt-Rottelsdorf
    {"latitude": 52.123333, "longitude": 11.160000, "weight": 87.65},  # Windpark Ausleben-Badeleben-Wormsdorf
    {"latitude": 53.070833, "longitude": 7.739167, "weight": 86.5},  # Windpark Saterland
    {"latitude": 51.721111, "longitude": 11.644167, "weight": 83.35},  # Windpark Alsleben
    {"latitude": 51.798611, "longitude": 11.491944, "weight": 83.05},  # Windpark Blaue Warthe
    {"latitude": 51.474167, "longitude": 13.249722, "weight": 82.8},  # Windfeld Randowhöhe
    {"latitude": 51.173056, "longitude": 11.350556, "weight": 82.65},  # Windpark Rastenberg-Olbersleben
    {"latitude": 51.975833, "longitude": 11.451944, "weight": 79.1},  # Windpark Egeln-Nord
    {"latitude": 53.363056, "longitude": 7.705000, "weight": 77.4},  # Windpark Wiesmoor
    {"latitude": 51.774444, "longitude": 12.700833, "weight": 77.1},  # Windpark Dorna-Kemberg-Schnellin
    {"latitude": 52.027778, "longitude": 11.367778, "weight": 76.9},  # Windfeld Sonnenberg
    {"latitude": 53.320833, "longitude": 12.026944, "weight": 75.2},  # Windpark Jännersdorf
    {"latitude": 51.617222, "longitude": 8.803333, "weight": 75.05},  # Windpark Altenautal
    {"latitude": 52.192500, "longitude": 11.368056, "weight": 71.3},  # Windpar Bornstedt-Nordgermersleben-Rottmersleben-Schackensleben
    {"latitude": 51.642500, "longitude": 11.658333, "weight": 72},  # Windpark Gerbstedt-Ihlewitz
    {"latitude": 49.964722, "longitude": 7.652500, "weight": 70.5},  # Hunsrück-Windpark Ellern
    {"latitude": 52.867500, "longitude": 7.138889, "weight": 70.1},  # Windpark Haren
    {"latitude": 51.041111, "longitude": 6.530000, "weight": 67.2},  # Windpark Königshovener Höhe
    {"latitude": 51.445278, "longitude": 8.696944, "weight": 65.95},  # Windpark Madfeld-Bleiwäsche
    {"latitude": 53.817778, "longitude": 8.078889, "weight": 65.6},  # Windpark Altenbruch
    {"latitude": 52.176389, "longitude": 11.300000, "weight": 64.1},  # Windpark Hakenstedt
    {"latitude": 51.946111, "longitude": 14.462778, "weight": 64},  # Windpark Cottbuser Halde
    {"latitude": 51.707778, "longitude": 12.239167, "weight": 62.7},  # Windpark Thurland
    {"latitude": 49.689167, "longitude": 8.106944, "weight": 61},  # Windfeld Rheinhessen-Pfalz
    {"latitude": 50.003333, "longitude": 7.386667, "weight": 59.8},  # Windpark Kirchberg im Faas
    {"latitude": 51.040556, "longitude": 11.620278, "weight": 59.1},  # Windpark Eckolstädt
    {"latitude": 51.247500, "longitude": 10.283889, "weight": 58.1},  # Windpark Büttstedt
    {"latitude": 51.072778, "longitude": 11.789444, "weight": 58},  # Windpark Molau-Leislau
    {"latitude": 54.483333, "longitude": 11.110000, "weight": 57.5},  # Windpark Fehmarn-Mitte
    {"latitude": 49.830000, "longitude": 8.138889, "weight": 55},  # Windpark Wörrstadt
    {"latitude": 49.296111, "longitude": 9.415556, "weight": 54.9},  # Windpark Harthäuser Wald
    {"latitude": 53.373333, "longitude": 9.496944, "weight": 52.9},  # Windpark Ahrenswohlde-Wohnste
    {"latitude": 48.980833, "longitude": 11.102500, "weight": 52.8},  # Windpark Raitenbucher Forst
    {"latitude": 48.740000, "longitude": 9.889722, "weight": 52.25},  # Windpark Lauterstein
    {"latitude": 49.721111, "longitude": 7.721944, "weight": 52.2},  # Windpark Lettweiler Höhe
    {"latitude": 50.603056, "longitude": 9.243889, "weight": 49.65},  # Windpark Goldner Steinrück
    {"latitude": 50.516944, "longitude": 6.373611, "weight": 49.45},  # Windpark Schleiden-Schöneseiffen
    {"latitude": 53.538889, "longitude": 8.952778, "weight": 48.8},  # Windpark Köhlen
    {"latitude": 49.764167, "longitude": 8.059722, "weight": 47.1},  # Windpark Heimersheim
    {"latitude": 53.396667, "longitude": 14.169167, "weight": 46.3},  # Windfeld Wolfsmoor
    {"latitude": 53.684167, "longitude": 8.646111, "weight": 46},  # Windpark Holßel
    {"latitude": 51.838333, "longitude": 12.875278, "weight": 44.9},  # Windpark Elster
    {"latitude": 52.002222, "longitude": 12.123056, "weight": 44.4},  # Windpark Zerbst
    {"latitude": 52.178333, "longitude": 11.886111, "weight": 43.6},  # Windpark Stegelitz-Ziepel-Tryppehna
    {"latitude": 53.606944, "longitude": 8.793056, "weight": 43.2},  # Windpark Kührstedt-Alfstedt
    {"latitude": 52.060111, "longitude": 14.381000, "weight": 43.2},  # Windpark Ullersdorf
    {"latitude": 49.813333, "longitude": 8.017778, "weight": 42.4},  # Windpark Gau-Bickelheim
    {"latitude": 51.422778, "longitude": 11.834444, "weight": 42},  # Windpark Holleben-Bad Lauchstädt
    {"latitude": 54.648611, "longitude": 9.176389, "weight": 41.8},  # Bürgerwindpark Löwenstedt
    {"latitude": 50.623861, "longitude": 9.153528, "weight": 41.6},  # Windpark Feldatal
    {"latitude": 51.413056, "longitude": 11.587222, "weight": 41},  # Windpark Farnstädt
    {"latitude": 52.976667, "longitude": 7.415833, "weight": 40.9},  # Windpark Dörpen-Ost
    {"latitude": 52.878056, "longitude": 10.042778, "weight": 40.5},  # Windpark Hermannsburg
    {"latitude": 52.900000, "longitude": 12.384167, "weight": 40.4},  # Windpark Kyritz-Plänitz-Zernitz
    {"latitude": 52.597222, "longitude": 12.266667, "weight": 40},  # Windpark Stüdenitz
]
    sun_parks = [
    {"latitude": 51.3167, "longitude": 12.3667, "weight": 605},   # Witznitz
    {"latitude": 51.3236, "longitude": 12.6511, "weight": 52},    # Waldpolenz Solar Park
    {"latitude": 51.7625, "longitude": 13.6000, "weight": 52.3},  # Walddrehna Solar Park
    {"latitude": 53.9239, "longitude": 13.2256, "weight": 52},    # Tutow Solar Park
    {"latitude": 53.0290, "longitude": 13.5336, "weight": 128.5}, # Templin Solar Park
    {"latitude": 48.8031, "longitude": 12.7669, "weight": 54},    # Strasskirchen Solar Park
    {"latitude": 53.6391, "longitude": 12.3643, "weight": 76},    # Solarpark Zietlitz
    {"latitude": 52.6475, "longitude": 13.6916, "weight": 187},   # Solarpark Weesow-Willmersdorf
    {"latitude": 53.5267, "longitude": 11.6609, "weight": 172},   # Solarpark Tramm-Göhten
    {"latitude": 48.6490, "longitude": 11.2782, "weight": 120},   # Solarpark Schornhof
    {"latitude": 51.5450, "longitude": 13.9800, "weight": 166},   # Solarpark Meuro
    {"latitude": 50.5960, "longitude": 9.3690, "weight": 54.7},   # Solarpark Lauterbach
    {"latitude": 52.6413, "longitude": 14.1923, "weight": 150},   # Solarpark Gottesgabe
    {"latitude": 53.3818, "longitude": 12.2688, "weight": 65},    # Solarpark Ganzlin
    {"latitude": 53.4148, "longitude": 12.2470, "weight": 90},    # Solarpark Gaarz
    {"latitude": 52.8253, "longitude": 13.6983, "weight": 84.7},  # Solarpark Finow Tower
    {"latitude": 52.6975, "longitude": 14.2300, "weight": 150},   # Solarpark Alttrebbin
    {"latitude": 53.2000, "longitude": 12.5167, "weight": 67.8},  # Solarpark Alt Daber
    {"latitude": 52.6139, "longitude": 14.2425, "weight": 145},   # Neuhardenberg Solar Park
    {"latitude": 51.9319, "longitude": 14.4072, "weight": 71.8},  # Lieberose Photovoltaic Park
    {"latitude": 51.5686, "longitude": 13.7375, "weight": 80.7},  # Finsterwalde Solar Park
    {"latitude": 54.6294, "longitude": 9.3433, "weight": 83.6},   # Eggebek Solar Park
    {"latitude": 52.4367, "longitude": 12.4514, "weight": 91}     # Brandenburg-Briest Solarpark
]

    # Convert start_date from string to datetime object
    forecast_start_date_dt = datetime.strptime(start_date, "%Y-%m-%d")

    # Determine end_date based on either end_date or hours
    if end_date:
        forecast_end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")
    elif hours:
        forecast_end_date_dt = forecast_start_date_dt + timedelta(hours=hours)
    else:
        raise ValueError("Either end_date or hours must be provided")

    # Convert dates back to string format
    forecast_start_date = forecast_start_date_dt.strftime("%Y-%m-%d")
    forecast_end_date = forecast_end_date_dt.strftime("%Y-%m-%d")

    # Function to fetch data for a given set of coordinates and parameters
    def fetch_data(coords, params_template):
        all_data = []
        for coord in coords:
            params = params_template.copy()
            params.update({
                "latitude": coord["latitude"],
                "longitude": coord["longitude"],
            })

            while True:
                try:
                    # Fetch forecast weather data for the current location
                    responses = openmeteo.weather_api(forecast_url, params=params)
                    response = responses[0]

                    # Extract hourly data for this location
                    hourly = response.Hourly()
                    hourly_data = {
                        "date": pd.date_range(
                            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                            freq=pd.Timedelta(seconds=hourly.Interval()),
                            inclusive="left"
                        )
                    }
                    for var in params_template["hourly"]:
                        hourly_data[var] = hourly.Variables(params_template["hourly"].index(var)).ValuesAsNumpy()

                    # Convert to DataFrame and append to the list
                    hourly_dataframe = pd.DataFrame(data=hourly_data)
                    hourly_dataframe["weight"] = coord.get("weight", 1)  # Add weight column
                    all_data.append(hourly_dataframe)
                    break  # Exit the loop if data is fetched successfully

                except Exception as e:
                    print(f"Error fetching forecast data for coordinates {coord}: {e}")
                    if "Minutely API request limit exceeded" in str(e):
                        print("Waiting for one minute before retrying...")
                        time.sleep(60)  # Wait for one minute before retrying
                    else:
                        break  # Exit the loop if the error is not related to the request limit
        return pd.concat(all_data)

    # Fetch data for coordinates
    params_template = {
        "start_date": forecast_start_date,
        "end_date": forecast_end_date,
        "hourly": ["temperature_2m", "precipitation", "wind_speed_100m"]
    }
    coordinates_data = fetch_data(coordinates, params_template)

    # Fetch data for wind parks
    params_template = {
        "start_date": forecast_start_date,
        "end_date": forecast_end_date,
        "hourly": ["wind_speed_100m"]
    }
    wind_parks_data = fetch_data(wind_parks, params_template)

    # Fetch data for sun parks
    params_template = {
        "start_date": forecast_start_date,
        "end_date": forecast_end_date,
        "hourly": ["direct_radiation"]
    }
    sun_parks_data = fetch_data(sun_parks, params_template)

    # Calculate weighted averages for wind parks
    total_weight = sum(park["weight"] for park in wind_parks)
    wind_parks_data["weighted_wind_speed"] = wind_parks_data["wind_speed_100m"] * wind_parks_data["weight"] / total_weight
    weighted_wind_speed = wind_parks_data.groupby("date")["weighted_wind_speed"].sum()

    # Calculate weighted averages for sun parks
    total_weight = sum(park["weight"] for park in sun_parks)
    sun_parks_data["weighted_radiation"] = sun_parks_data["direct_radiation"] * sun_parks_data["weight"] / total_weight
    weighted_radiation = sun_parks_data.groupby("date")["weighted_radiation"].sum()

    # Calculate averages for coordinates
    coordinates_avg = coordinates_data.groupby("date").mean()

    # Combine all data into one DataFrame
    combined_df = pd.concat([coordinates_avg, weighted_wind_speed, weighted_radiation], axis=1)

    # Save the forecast data to a CSV file
    forecast_csv_file = "../data/forecast_weather.csv"
    combined_df.to_csv(forecast_csv_file, index=True)
    print(f"Forecast data saved to {forecast_csv_file}.")

# Example call to the function
#fetch_forecast("2024-11-01", hours=48)
fetch_forecast("2024-11-01", "2024-11-04")


Forecast data saved to ../data/forecast_weather.csv.


In [20]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import importlib.util
import time
from datetime import datetime, timedelta

# Setup the Open-Meteo API client with caching and retries
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Define the base URL for the weather API
historical_url = "https://archive-api.open-meteo.com/v1/archive"
forecast_url = "https://api.open-meteo.com/v1/forecast"

def update_dates_from_csv(csv_file, start_date, end_date):
    try:
        # Read the CSV file
        df = pd.read_csv(csv_file, parse_dates=["date"], index_col="date")

        # Get the last date in the CSV file
        last_date = df.index.max()

        # Convert end_date to datetime
        end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")

        # Check if the last date is less than end_date
        if last_date < end_date_dt:
            start_date = last_date.strftime("%Y-%m-%d")

        return start_date, end_date
    except FileNotFoundError:
        # If the file does not exist, return the original dates
        return start_date, end_date

def fetch_weather_data(start_date, end_date, coordinates, wind_parks, sun_parks):
    # Update dates based on the existing CSV file
    csv_file = "../data/germany_weather_average.csv"
    start_date, end_date = update_dates_from_csv(csv_file, start_date, end_date)

    # Convert end_date to a datetime object
    end_date_dt = datetime.strptime(end_date, "%Y-%m-%d")

    # Calculate forecast start and end dates
    forecast_start_date_dt = end_date_dt + timedelta(days=1)
    forecast_end_date_dt = end_date_dt + timedelta(days=2)

    # Convert back to string format
    forecast_start_date = forecast_start_date_dt.strftime("%Y-%m-%d")
    forecast_end_date = forecast_end_date_dt.strftime("%Y-%m-%d")

    # Define the weather variables and date range for historical data
    params_template = {
        "start_date": start_date,
        "end_date": end_date,
        "hourly": [
            "temperature_2m",
            "precipitation",
            "wind_speed_100m",
            "direct_radiation"
        ]
    }

    # Store data for all locations
    all_data = []

    for coord in coordinates:
        params = params_template.copy()
        params.update({
            "latitude": coord["latitude"],
            "longitude": coord["longitude"],
        })

        while True:
            try:
                # Fetch historical weather data for the current location
                responses = openmeteo.weather_api(historical_url, params=params)
                response = responses[0]

                # Extract hourly data for this location
                hourly = response.Hourly()
                hourly_data = {
                    "date": pd.date_range(
                        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                        freq=pd.Timedelta(seconds=hourly.Interval()),
                        inclusive="left"
                    )
                }
                hourly_data["temperature_2m"] = hourly.Variables(0).ValuesAsNumpy()
                hourly_data["precipitation"] = hourly.Variables(1).ValuesAsNumpy()
                hourly_data["wind_speed_100m"] = hourly.Variables(2).ValuesAsNumpy()
                hourly_data["direct_radiation"] = hourly.Variables(3).ValuesAsNumpy()

                # Convert to DataFrame and append to the list
                hourly_dataframe = pd.DataFrame(data=hourly_data)
                all_data.append(hourly_dataframe)
                break  # Exit the loop if data is fetched successfully

            except Exception as e:
                print(f"Error fetching historical data for coordinates {coord}: {e}")
                if "Minutely API request limit exceeded" in str(e):
                    print("Waiting for one minute before retrying...")
                    time.sleep(60)  # Wait for one minute before retrying
                else:
                    break  # Exit the loop if the error is not related to the request limit

    # Fetch forecast data for the next 2 days after the end_date
    for coord in coordinates:
        params = {
            "latitude": coord["latitude"],
            "longitude": coord["longitude"],
            "hourly": ["temperature_2m", "precipitation", "wind_speed_100m", "direct_radiation"],
            "start_date": forecast_start_date,
            "end_date": forecast_end_date
        }

        while True:
            try:
                # Fetch forecast weather data for the current location
                responses = openmeteo.weather_api(forecast_url, params=params)
                response = responses[0]

                # Extract hourly data for this location
                hourly = response.Hourly()
                hourly_data = {
                    "date": pd.date_range(
                        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                        freq=pd.Timedelta(seconds=hourly.Interval()),
                        inclusive="left"
                    )
                }
                hourly_data["temperature_2m"] = hourly.Variables(0).ValuesAsNumpy()
                hourly_data["precipitation"] = hourly.Variables(1).ValuesAsNumpy()
                hourly_data["wind_speed_100m"] = hourly.Variables(2).ValuesAsNumpy()
                hourly_data["direct_radiation"] = hourly.Variables(3).ValuesAsNumpy()

                # Convert to DataFrame and append to the list
                hourly_dataframe = pd.DataFrame(data=hourly_data)
                all_data.append(hourly_dataframe)
                break  # Exit the loop if data is fetched successfully

            except Exception as e:
                print(f"Error fetching forecast data for coordinates {coord}: {e}")
                if "Minutely API request limit exceeded" in str(e):
                    print("Waiting for one minute before retrying...")
                    time.sleep(60)  # Wait for one minute before retrying
                else:
                    break  # Exit the loop if the error is not related to the request limit

    # Combine all data into one DataFrame
    combined_df = pd.concat(all_data)

    # Group by date and calculate the mean for all variables
    averaged_data = combined_df.groupby("date").mean()

    # Rename columns for better understanding
    averaged_data = averaged_data.rename(columns={
        "precipitation": "Precipitation (rain/snow)"
    })

    # Save the averaged data to a CSV file
    averaged_data.to_csv(csv_file, index=True)

    # Define the weather variables and date range for historical data
    params_template = {
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["wind_speed_100m"]
    }

    # Store data for all locations
    all_data = []

    for coord in wind_parks:
        params = params_template.copy()
        params.update({
            "latitude": coord["latitude"],
            "longitude": coord["longitude"],
        })

        while True:
            try:
                # Fetch historical weather data for the current location
                responses = openmeteo.weather_api(historical_url, params=params)
                response = responses[0]

                # Extract hourly data for this location
                hourly = response.Hourly()
                hourly_data = {
                    "date": pd.date_range(
                        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                        freq=pd.Timedelta(seconds=hourly.Interval()),
                        inclusive="left"
                    )
                }
                hourly_data["wind_speed_100m"] = hourly.Variables(0).ValuesAsNumpy()

                # Convert to DataFrame and append to the list
                hourly_dataframe = pd.DataFrame(data=hourly_data)
                all_data.append(hourly_dataframe)
                break  # Exit the loop if data is fetched successfully

            except Exception as e:
                print(f"Error fetching historical data for coordinates {coord}: {e}")
                if "Minutely API request limit exceeded" in str(e):
                    print("Waiting for one minute before retrying...")
                    time.sleep(60)  # Wait for one minute before retrying
                else:
                    break  # Exit the loop if the error is not related to the request limit

    # Fetch forecast data for the next 2 days after the end_date
    for coord in wind_parks:
        params = {
            "latitude": coord["latitude"],
            "longitude": coord["longitude"],
            "hourly": ["wind_speed_100m"],
            "start_date": forecast_start_date,
            "end_date": forecast_end_date
        }

        while True:
            try:
                # Fetch forecast weather data for the current location
                responses = openmeteo.weather_api(forecast_url, params=params)
                response = responses[0]

                # Extract hourly data for this location
                hourly = response.Hourly()
                hourly_data = {
                    "date": pd.date_range(
                        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                        freq=pd.Timedelta(seconds=hourly.Interval()),
                        inclusive="left"
                    )
                }
                hourly_data["wind_speed_100m"] = hourly.Variables(0).ValuesAsNumpy()

                # Convert to DataFrame and append to the list
                hourly_dataframe = pd.DataFrame(data=hourly_data)
                all_data.append(hourly_dataframe)
                break  # Exit the loop if data is fetched successfully

            except Exception as e:
                print(f"Error fetching forecast data for coordinates {coord}: {e}")
                if "Minutely API request limit exceeded" in str(e):
                    print("Waiting for one minute before retrying...")
                    time.sleep(60)  # Wait for one minute before retrying
                else:
                    break  # Exit the loop if the error is not related to the request limit

    # Combine all data into one DataFrame
    combined_df = pd.concat(all_data)

    # Combine weighted wind speeds across all parks
    total_weight = sum(park["weight"] for park in wind_parks)
    combined_wind_speed = combined_df.groupby("date").apply(
        lambda x: sum(x["wind_speed_100m"] * [park["weight"] for park in wind_parks]) / total_weight
    )

    # Save the weighted average wind speed to a CSV file
    csv_file = "../data/weighted_windspeed.csv"
    combined_wind_speed.to_csv(csv_file, index=True, header=["windspeed 100m"])

    # Define the weather variables and date range for historical data
    params_template = {
        "start_date": start_date,
        "end_date": end_date,
        "hourly": ["direct_radiation"]
    }

    # Store data for all locations
    all_data = []

    for coord in sun_parks:
        params = params_template.copy()
        params.update({
            "latitude": coord["latitude"],
            "longitude": coord["longitude"],
        })

        while True:
            try:
                # Fetch historical weather data for the current location
                responses = openmeteo.weather_api(historical_url, params=params)
                response = responses[0]

                # Extract hourly data for this location
                hourly = response.Hourly()
                hourly_data = {
                    "date": pd.date_range(
                        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                        freq=pd.Timedelta(seconds=hourly.Interval()),
                        inclusive="left"
                    )
                }
                hourly_data["direct_radiation"] = hourly.Variables(0).ValuesAsNumpy()

                # Convert to DataFrame and append to the list
                hourly_dataframe = pd.DataFrame(data=hourly_data)
                all_data.append(hourly_dataframe)
                break  # Exit the loop if data is fetched successfully

            except Exception as e:
                print(f"Error fetching historical data for coordinates {coord}: {e}")
                if "Minutely API request limit exceeded" in str(e):
                    print("Waiting for one minute before retrying...")
                    time.sleep(60)  # Wait for one minute before retrying
                else:
                    break  # Exit the loop if the error is not related to the request limit

    # Fetch forecast data for the next 2 days after the end_date
    for coord in sun_parks:
        params = {
            "latitude": coord["latitude"],
            "longitude": coord["longitude"],
            "hourly": ["direct_radiation"],
            "start_date": forecast_start_date,
            "end_date": forecast_end_date
        }

        while True:
            try:
                # Fetch forecast weather data for the current location
                responses = openmeteo.weather_api(forecast_url, params=params)
                response = responses[0]

                # Extract hourly data for this location
                hourly = response.Hourly()
                hourly_data = {
                    "date": pd.date_range(
                        start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                        end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                        freq=pd.Timedelta(seconds=hourly.Interval()),
                        inclusive="left"
                    )
                }
                hourly_data["direct_radiation"] = hourly.Variables(0).ValuesAsNumpy()

                # Convert to DataFrame and append to the list
                hourly_dataframe = pd.DataFrame(data=hourly_data)
                all_data.append(hourly_dataframe)
                break  # Exit the loop if data is fetched successfully

            except Exception as e:
                print(f"Error fetching forecast data for coordinates {coord}: {e}")
                if "Minutely API request limit exceeded" in str(e):
                    print("Waiting for one minute before retrying...")
                    time.sleep(60)  # Wait for one minute before retrying
                else:
                    break  # Exit the loop if the error is not related to the request limit

    # Combine all data into one DataFrame
    combined_df = pd.concat(all_data)

    # Combine weighted sun radiation across all parks
    total_weight = sum(park["weight"] for park in sun_parks)
    combined_sun_radiation = combined_df.groupby("date").apply(
        lambda x: sum(x["direct_radiation"] * [park["weight"] for park in sun_parks]) / total_weight
    )

    # Save the weighted average sun radiation to a CSV file
    csv_file = '../data/weighted_sun_radiation.csv'
    combined_sun_radiation.to_csv(csv_file, index=True, header=["direct_radiation"])
    print(f"Weighted average sun radiation saved to {csv_file}.")

    # Update the original CSV file with new data
    df_orig = pd.read_csv('../data/germany_weather_average.csv')
    df_replacement_sun = pd.read_csv('../data/weighted_sun_radiation.csv')
    df_replacement_wind = pd.read_csv('../data/weighted_windspeed.csv')

    # Replace columns in the original DataFrame with new data
    df_orig['direct_radiation'] = df_replacement_sun['direct_radiation']
    df_orig['wind_speed_100m'] = df_replacement_wind['windspeed 100m']

    # Save the updated DataFrame to the original CSV file
    df_orig.to_csv('../data/germany_weather_average.csv', index=False)

# Example usage
start_date = "2018-01-01"
end_date = "2025-01-25"
coordinates = [
    # Add your coordinates here
]
wind_parks = [
    # Add your wind parks here
]
sun_parks = [
    # Add your sun parks here
]

fetch_weather_data(start_date, end_date, coordinates, wind_parks, sun_parks)

TypeError: can't compare offset-naive and offset-aware datetimes

In [19]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import importlib.util
import time
from datetime import datetime, timedelta

start_date = "2018-01-01"
end_date = "2025-01-18"

fetch_weather_data(start_date, end_date)

  combined_wind_speed = combined_df.groupby("date").apply(


Weighted average wind speed saved to ../data/weighted_windspeed.csv.


  combined_sun_radiation = combined_df.groupby("date").apply(


Weighted average sun radiation saved to ../data/weighted_sun_radiation.csv.
