In [31]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import folium
import math
import datetime
import geopandas as gpd
import urllib.request
import requests
import json
import time
import openmeteo_requests
import requests_cache
from folium.plugins import MarkerCluster
from shapely.geometry import Polygon, Point
from geopy.geocoders import Nominatim
from tqdm import tqdm
from geopy.distance import geodesic
from retry_requests import retry
from shapely.wkt import loads

In [32]:
NUM_SPLITS = 10

In [33]:
# grid df
# sample df

grids = pd.read_csv("data_bomen/grid_enriched_200.csv", sep=",", encoding="utf-8")
negative_samples = pd.read_csv("data_bomen/negative_samples_200.csv", sep=",", encoding="utf-8")

In [34]:
grids['geometry'] = grids['geometry'].apply(lambda x: loads(x))
grids_gdf = gpd.GeoDataFrame(grids, geometry='geometry')

grids_gdf['centroid'] = grids_gdf['geometry'].centroid
grids_gdf['middle_lat'] = grids_gdf['centroid'].apply(lambda point: point.y)
grids_gdf['middle_lon'] = grids_gdf['centroid'].apply(lambda point: point.x)

negative_samples = negative_samples.merge(grids_gdf[['middle_lat', 'middle_lon']], left_on='grid_id', right_index=True)

negative_samples.rename(columns={'middle_lat': 'LAT', 'middle_lon': 'LON'}, inplace=True)
columns_order = ['Date', 'grid_id', 'LAT', 'LON'] + [col for col in negative_samples.columns if col not in ['Date', 'grid_id', 'LAT', 'LON']]
negative_samples = negative_samples[columns_order]

negative_samples = negative_samples.drop('Unnamed: 0', axis=1, errors='ignore')

negative_samples = negative_samples.sort_index()

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


In [35]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [36]:
def get_weather_data(df_split):
    # NOTE: order of weather vars matters for retrieving correct data from API
    # TODO: check: what does api do when nothing available? NAN? and necessary to set up connection for every df split?
    weather_variables = {
        'temperature_2m': [],
        'relative_humidity_2m': [],
        'dew_point_2m': [],
        'apparent_temperature': [],
        'precipitation': [],
        'rain': [],
        'snowfall': [],
        'snow_depth': [],
        'weather_code': [],
        'pressure_msl': [],
        'surface_pressure': [],
        'cloud_cover': [],
        'cloud_cover_low': [],
        'cloud_cover_mid': [],
        'cloud_cover_high': [],
        'et0_fao_evapotranspiration': [],
        'vapour_pressure_deficit': [],
        'wind_speed_10m': [],
        'wind_speed_100m': [],
        'wind_direction_10m': [],
        'wind_direction_100m': [],
        'wind_gusts_10m': [],
        'soil_temperature_0_to_7cm': [],
        'soil_temperature_7_to_28cm': [],
        'soil_temperature_28_to_100cm': [],
        'soil_temperature_100_to_255cm': [],
        'soil_moisture_0_to_7cm': [],
        'soil_moisture_7_to_28cm': [],
        'soil_moisture_28_to_100cm': [],
        'soil_moisture_100_to_255cm': []
    }
    for i, row in df_split.iterrows():
        latitude = row['LAT']
        longitude = row['LON']
        dateStr = row['Date']
        timeStr = row['Hour']

        latitude='{:.5f}'.format(latitude)
        longitude='{:.5f}'.format(longitude)

        url = "https://archive-api.open-meteo.com/v1/archive"
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "start_date": dateStr,
            "end_date": dateStr,
            "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm"]
        }
        responses = openmeteo.weather_api(url, params=params)

        # Process first location. Add a for-loop for multiple locations or weather models
        response = responses[0]

        # Process hourly data. The order of variables needs to be the same as requested.
        hourly = response.Hourly()

        # Get data for each var
        for i, (name, var_list) in enumerate(weather_variables.items()):
            var_list.append(hourly.Variables(i).ValuesAsNumpy()[int(timeStr)])

    for name, var_list in weather_variables.items():
        df_split[name] = var_list
    
    return df_split

In [39]:
df_splits = np.array_split(negative_samples, NUM_SPLITS)

splits = []
for i, df_split in enumerate(df_splits):
    print(f"Getting data for subsplit {i}")
    splits.append(get_weather_data(df_split))
    
    if i < len(df_splits) - 1:
        print("Waiting for one minute...")
        time.sleep(60) 


negative_samples_with_weather = pd.concat(splits, axis=0)


  return bound(*args, **kwds)


Getting data for subsplit 0
Waiting for one minute...
Getting data for subsplit 1
Waiting for one minute...
Getting data for subsplit 2
Waiting for one minute...
Getting data for subsplit 3
Waiting for one minute...
Getting data for subsplit 4
Waiting for one minute...
Getting data for subsplit 5
Waiting for one minute...
Getting data for subsplit 6
Waiting for one minute...
Getting data for subsplit 7
Waiting for one minute...
Getting data for subsplit 8
Waiting for one minute...
Getting data for subsplit 9


In [40]:
negative_samples_with_weather

Unnamed: 0,Date,grid_id,LAT,LON,Hour,has_tree,avg_height,avg_diameter,avg_year,Fraxinus,...,wind_direction_100m,wind_gusts_10m,soil_temperature_0_to_7cm,soil_temperature_7_to_28cm,soil_temperature_28_to_100cm,soil_temperature_100_to_255cm,soil_moisture_0_to_7cm,soil_moisture_7_to_28cm,soil_moisture_28_to_100cm,soil_moisture_100_to_255cm
0,2022-02-21,10312,52.375189,4.801572,12,True,11.559,0.271,1969.300,0.0,...,291.526550,81.720001,6.017500,6.667500,7.567500,9.767500,0.757,0.745,0.718,0.682
1,2017-09-13,10479,52.376991,4.789767,15,True,12.225,0.518,1986.417,6.0,...,256.519104,73.439995,16.043499,14.743500,15.393499,13.893499,0.692,0.678,0.500,0.602
2,2023-11-02,10141,52.373387,4.801572,17,True,13.371,0.420,1911.220,3.0,...,195.539978,78.479996,9.524000,11.074000,12.674000,13.924000,0.752,0.754,0.733,0.650
3,2022-11-17,8090,52.351766,4.804523,2,True,8.897,0.350,1979.137,0.0,...,130.717026,75.239998,9.237000,9.937000,11.937000,13.587001,0.746,0.649,0.557,0.605
4,2023-07-05,9992,52.371585,4.866500,11,True,13.560,0.543,1888.980,0.0,...,261.665710,70.919998,13.200000,16.350000,16.000000,10.850000,0.708,0.493,0.538,0.661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1959,2023-07-06,2964,52.297712,4.816328,17,True,21.500,0.000,0.000,0.0,...,266.099579,27.359999,19.924002,17.224001,16.224001,12.724000,0.320,0.296,0.267,0.368
1960,2019-08-27,3025,52.297712,4.996356,21,True,9.047,0.000,1741.632,22.0,...,134.256027,11.520000,24.000000,21.500000,16.700001,12.900000,0.401,0.445,0.460,0.592
1961,2023-07-05,6044,52.330144,4.822231,9,True,20.500,0.950,0.000,0.0,...,268.903595,97.919998,13.100000,16.549999,16.000000,10.850000,0.733,0.474,0.538,0.661
1962,2023-07-05,13456,52.407621,4.996356,9,True,0.000,0.000,0.000,0.0,...,260.949341,104.399994,13.200000,16.750000,16.200001,11.000000,0.726,0.428,0.533,0.651
