In [6]:
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from geopy.geocoders import Nominatim
from tqdm import tqdm
from geopy.distance import geodesic
import folium
from folium.plugins import MarkerCluster
import math
import datetime
import geopandas as gpd
import urllib.request
import requests
import json
import openmeteo_requests
import requests_cache
from shapely.geometry import Polygon, Point
from retry_requests import retry
from shapely.wkt import loads

In [20]:
NUM_SPLITS = 10

In [7]:
# grid df
# sample df

grids = pd.read_csv("data_bomen/grid_enriched_200.csv", sep=",", encoding="utf-8")
samples = pd.read_csv("data_bomen/negative_samples_200.csv", sep=",", encoding="utf-8")

In [10]:
grids['geometry'] = grids['geometry'].apply(lambda x: loads(x))
grids_gdf = gpd.GeoDataFrame(grids, geometry='geometry')

grids_gdf['centroid'] = grids_gdf['geometry'].centroid
grids_gdf['middle_lat'] = grids_gdf['centroid'].apply(lambda point: point.y)
grids_gdf['middle_lon'] = grids_gdf['centroid'].apply(lambda point: point.x)

negative_samples = samples.merge(grids_gdf[['middle_lat', 'middle_lon']], left_on='grid_id', right_index=True)

negative_samples.rename(columns={'middle_lat': 'LAT', 'middle_lon': 'LON'}, inplace=True)
columns_order = ['Date', 'grid_id', 'LAT', 'LON'] + [col for col in negative_samples.columns if col not in ['Date', 'grid_id', 'LAT', 'LON']]
negative_samples = negative_samples[columns_order]

negative_samples = negative_samples.drop('Unnamed: 0', axis=1, errors='ignore')

negative_samples = negative_samples.sort_index()

  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)
  result = super().apply(func, convert_dtype=convert_dtype, args=args, **kwargs)


In [11]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

In [17]:
def get_weather_data(df_split):
    # NOTE: order of weather vars matters for retrieving correct data from API
    # TODO: check: what does api do when nothing available? NAN? and necessary to set up connection for every df split?
    weather_variables = [
        temperature_2m_list = [],
        relative_humidity_2m_list = [],
        dew_point_2m_list = [],
        apparent_temperature_list = [],
        precipitation_list = [],
        rain_list = [],
        snowfall_list = [],
        snow_depth_list = [],
        weather_code_list = [],
        pressure_msl_list = [],
        surface_pressure_list = [],
        cloud_cover_list = [],
        cloud_cover_low_list = [],
        cloud_cover_mid_list = [],
        cloud_cover_high_list = [],
        et0_fao_evapotranspiration_list = [],
        vapour_pressure_deficit_list = [],
        wind_speed_10m_list = [],
        wind_speed_100m_list = [],
        wind_direction_10m_list = [],
        wind_direction_100m_list = [],
        wind_gusts_10m_list = [],
        soil_temperature_0_to_7cm_list = [],
        soil_temperature_7_to_28cm_list = [],
        soil_temperature_28_to_100cm_list = [],
        soil_temperature_100_to_255cm_list = [],
        soil_moisture_0_to_7cm_list = [],
        soil_moisture_7_to_28cm_list = [],
        soil_moisture_28_to_100cm_list = [],
        soil_moisture_100_to_255cm_list = []
    ]

    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)
    for i, row in df_split.iterrows():
        latitude = row['LAT']
        longitude = row['LON']
        dateStr = row['Date']
        timeStr = row['Hour']

        latitude='{:.5f}'.format(latitude)
        longitude='{:.5f}'.format(longitude)

        url = "https://archive-api.open-meteo.com/v1/archive"
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "start_date": dateStr,
            "end_date": dateStr,
            "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "snowfall", "snow_depth", "weather_code", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_10m", "wind_speed_100m", "wind_direction_10m", "wind_direction_100m", "wind_gusts_10m", "soil_temperature_0_to_7cm", "soil_temperature_7_to_28cm", "soil_temperature_28_to_100cm", "soil_temperature_100_to_255cm", "soil_moisture_0_to_7cm", "soil_moisture_7_to_28cm", "soil_moisture_28_to_100cm", "soil_moisture_100_to_255cm"]
        }
        responses = openmeteo.weather_api(url, params=params)

        # Process first location. Add a for-loop for multiple locations or weather models
        response = responses[0]

        # Process hourly data. The order of variables needs to be the same as requested.
        hourly = response.Hourly()

        # Get data for each var
        for i, var_list in enumerate(weather_variables):
            var_list.append(hourly.Variables(idx).ValuesAsNumpy()[int(timeStr)])

    df_split['temperature_2m'] = temperature_2m_list
    df_split['relative_humidity_2m'] = relative_humidity_2m_list
    df_split['dew_point_2m'] = dew_point_2m_list
    df_split['apparent_temperature'] = apparent_temperature_list
    df_split['precipitation'] = precipitation_list
    df_split['rain'] = rain_list
    df_split['snowfall'] = snowfall_list
    df_split['snow_depth'] = snow_depth_list
    df_split['weather_code'] = weather_code_list
    df_split['pressure_msl'] = pressure_msl_list
    df_split['surface_pressure'] = surface_pressure_list
    df_split['cloud_cover'] = cloud_cover_list
    df_split['cloud_cover_low'] = cloud_cover_low_list
    df_split['cloud_cover_mid'] = cloud_cover_mid_list
    df_split['cloud_cover_high'] = cloud_cover_high_list
    df_split['et0_fao_evapotranspiration'] = et0_fao_evapotranspiration_list
    df_split['vapour_pressure_deficit'] = vapour_pressure_deficit_list
    df_split['wind_speed_10m'] = wind_speed_10m_list
    df_split['wind_speed_100m'] = wind_speed_100m_list
    df_split['wind_direction_10m'] = wind_direction_10m_list
    df_split['wind_direction_100m'] = wind_direction_100m_list
    df_split['wind_gusts_10m'] = wind_gusts_10m_list
    df_split['soil_temperature_0_to_7cm'] = soil_temperature_0_to_7cm_list
    df_split['soil_temperature_7_to_28cm'] = soil_temperature_7_to_28cm_list
    df_split['soil_temperature_28_to_100cm'] = soil_temperature_28_to_100cm_list
    df_split['soil_temperature_100_to_255cm'] = soil_temperature_100_to_255cm_list
    df_split['soil_moisture_0_to_7cm'] = soil_moisture_0_to_7cm_list
    df_split['soil_moisture_7_to_28cm'] = soil_moisture_7_to_28cm_list
    df_split['soil_moisture_28_to_100cm'] = soil_moisture_28_to_100cm_list
    df_split['soil_moisture_100_to_255cm'] = soil_moisture_100_to_255cm_list
    
    return df_split

In [21]:
negative_samples.columns

Index(['Date', 'grid_id', 'LAT', 'LON', 'Hour', 'has_tree', 'avg_height',
       'avg_diameter', 'avg_year', 'Fraxinus', 'Salix', 'Alnus', 'Quercus',
       'Tilia', 'Acer', 'Populus', 'Betula', 'Prunus', 'Platanus', 'Malus',
       'Robinia', 'Crataegus', 'Ulmus', 'Carpinus', 'Overig', 'Onbekend'],
      dtype='object')

In [19]:
df_splits = np.array_split(negative_samples, NUM_SPLITS)

splits = []
for i, df_split in enumerate(df_splits):
    print(f"Getting data for subplit {i}")
    splits.append(get_weather_data(df_split))


negative_samples_with_weather = pd.concat(splits, axis=1)


Getting data for subplit 0


  return bound(*args, **kwds)


NameError: name 'row' is not defined