In [None]:
import datetime
import polars as pl
import json
import numpy as np
import os

In [None]:
max_date = datetime.datetime(2024, 7, 1, 0, 0)
min_date = datetime.datetime(2022, 4, 29, 8, 45)

weather_api_digit_resolution = 2

In [None]:
weather_api_key = os.environ.get("WEATHER_API_KEY")

In [None]:
with open("../prediction_service/stations_data.json", "r") as f:
    stations_data = json.load(f)
stations_data_df = pl.DataFrame(stations_data).with_columns(
    pl.col("lat").round(weather_api_digit_resolution).alias("lat_rounded"),
    pl.col("lon").round(weather_api_digit_resolution).alias("lon_rounded"),
)
stations_data_df

In [None]:
min_lat = stations_data_df["lat_rounded"].min()
max_lat = stations_data_df["lat_rounded"].max() + 10 ** -weather_api_digit_resolution
min_lon = stations_data_df["lon_rounded"].min()
max_lon = stations_data_df["lon_rounded"].max() + 10 ** -weather_api_digit_resolution

lat_range = np.arange(min_lat, max_lat, 10 ** -weather_api_digit_resolution)
lon_range = np.arange(min_lon, max_lon, 10 ** -weather_api_digit_resolution)

from itertools import product, pairwise

grid = product(
    pairwise(lat_range),
    pairwise(lon_range),
)

patches_with_stations = []

for (min_lat, max_lat), (min_lon, max_lon) in grid:
    stations_in_patch = stations_data_df.filter(
        (stations_data_df["lat"] > min_lat) & (stations_data_df["lat"] <= max_lat) & 
        (stations_data_df["lon"] > min_lon) & (stations_data_df["lon"] <= max_lon)
    )
    if not stations_in_patch.is_empty():
        patches_with_stations.append(
            {
                "min_lat": round(min_lat,2),
                "max_lat": round(max_lat,2),
                "min_lon": round(min_lon,2),
                "max_lon": round(max_lon,2),
                "count": stations_in_patch.shape[0],
            }
        )
len(patches_with_stations)

    

In [None]:
time_diff = max_date - min_date
days = time_diff.days
total_requests = days * len(patches_with_stations)

total_requests

In [None]:
import requests

url = "http://api.weatherapi.com/v1/history.json"

for patch in patches_with_stations:
    lat, lon = patch["min_lat"], patch["min_lon"]
    file = open(f"weather_data/{lat},{lon}.json", "w")
    for day in range(days):
        date = min_date + datetime.timedelta(days=day)
        querystring = {
            "key": weather_api_key,
            "q": f"{patch['min_lat']},{patch['min_lon']}",
            "dt": date.strftime("%Y-%m-%d"),
        }

        response = requests.request("GET", url, params=querystring)
        json.dump(response.json(), file)
        file.write("\n")

        break
    file.close()
    break
        

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle

plt.scatter(stations_data_df["lon"], stations_data_df["lat"])
max_patch_count = max(patches_with_stations, key=lambda x: x["count"])["count"]
for patch in patches_with_stations:
    rect = Rectangle(
        (patch["min_lon"], patch["min_lat"]),
        patch["max_lon"] - patch["min_lon"],
        patch["max_lat"] - patch["min_lat"],
        # fill="RED",
        # edgecolor="red",
        alpha=patch["count"] / max_patch_count,
        # linewidth=1 + 4 * patch["count"] / max_patch_count,
    )

    plt.gca().add_patch(
        rect)