In [34]:
import pickle
import numpy
import requests_cache
import openmeteo_requests
import pandas as pd
from pathlib import Path
from retry_requests import retry

In [35]:
MODEL_DIR = Path("models/trees/")
GRID_ENRICHED_PATH = Path("data_bomen/grid_by_hand_enriched.csv")

LOCATION = ("4.890439", "52.369496")

FEATURE_COLS = ['avg_height', 'avg_year', 'has_tree',
       'Fraxinus', 'Salix', 'Alnus', 'Quercus', 'Tilia', 'Acer', 'Populus',
       'Betula', 'Prunus', 'Platanus', 'Malus', 'Robinia', 'Crataegus',
       'Ulmus', 'Carpinus', 'Overig', 'Onbekend', 'temperature_2m', 'relative_humidity_2m', 'dew_point_2m',
       'apparent_temperature', 'precipitation', 'rain', 'snowfall',
       'snow_depth', 'weather_code', 'pressure_msl', 'surface_pressure',
       'wind_speed_10m', 'wind_direction_10m', 'wind_gusts_10m',
       ]

HOURS_TO_PREDICT = 8

In [36]:
grid_df = pd.read_csv(GRID_ENRICHED_PATH, sep=",", encoding="utf-8")

In [37]:
def get_api_connection(self):
    # Setup the Open-Meteo API client with cache and retry on error
    self.cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    self.retry_session = retry(self.cache_session, retries = 5, backoff_factor = 0.2)
    self.openmeteo = openmeteo_requests.Client(session = self.retry_session)

In [38]:
def request_weather(vars):
    # connect to API
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)

    latitude = LOCATION[0]
    longitude = LOCATION[1]

    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "hourly": list(vars.keys()),
        "forecast_days" : 1,
    }
    responses = openmeteo.weather_api(url, params=params)

    return responses[0]

In [39]:
vars = {
    'temperature_2m': None,
    'relative_humidity_2m': None,
    'dew_point_2m': None,
    'apparent_temperature': None,
    'precipitation': None,
    'rain': None,
    'snowfall': None,
    'snow_depth': None,
    'weather_code': None,
    'pressure_msl': None,
    'surface_pressure': None,
    # 'cloud_cover': None,
    # 'cloud_cover_low': None,
    # 'cloud_cover_mid': None,
    # 'cloud_cover_high': None,
    # 'et0_fao_evapotranspiration': None,
    # 'vapour_pressure_deficit': None,
    'wind_speed_10m': None,
    'wind_speed_100m': None,
    'wind_direction_10m': None,
    'wind_direction_100m': None,
    'wind_gusts_10m': None,
    # 'soil_temperature_0_to_7cm': None,
    # 'soil_temperature_7_to_28cm': None,
    # 'soil_temperature_28_to_100cm': None,
    # 'soil_temperature_100_to_255cm': None,
    # 'soil_moisture_0_to_7cm': None,
    # 'soil_moisture_7_to_28cm': None,
    # 'soil_moisture_28_to_100cm': None,
    # 'soil_moisture_100_to_255cm': None
}
response = request_weather(vars)

In [40]:
def extract_weather_vars(
    response,
    vars
):
    hourly = response.Hourly()

    # Fetch and process the first half of the variables
    for index, (name, _) in enumerate(vars.items()):
        vars[name] = hourly.Variables(index).ValuesAsNumpy()

    return vars

In [41]:
weather_vars = extract_weather_vars(response, vars)

In [42]:
def make_prediction(
    grid_df,
    clf
):
    pred_dict = {}
    for grid_id in grid_df.grid_id:
        pred_dict[grid_id] = []

    for i in range(HOURS_TO_PREDICT):
        grid = grid_df.copy()
        for var, values in weather_vars.items():
            grid[var] = values[i]
        grid['prediction'] = clf.predict(grid[FEATURE_COLS])
        for i, row in grid.iterrows():
            pred_dict[row['grid_id']].append(row['prediction'])

In [43]:
class makeTreePrediction():
    def __init__(
        self,
        model_name,
        grid_path,
        hours_to_predict = 8,
        model_dir = MODEL_DIR
    ):
        model_path = model_dir / model_name

        self.clf = self.load_model(model_path)
        
        self.grid_df = pd.read_csv(grid_path, sep=",", encoding="utf-8")

        self.hours_to_predict = hours_to_predict

    def get_predictions(
        self,
    ):
        vars = {
            'temperature_2m': None,
            'relative_humidity_2m': None,
            'dew_point_2m': None,
            'apparent_temperature': None,
            'precipitation': None,
            'rain': None,
            'snowfall': None,
            'snow_depth': None,
            'weather_code': None,
            'pressure_msl': None,
            'surface_pressure': None,
            'wind_speed_10m': None,
            'wind_speed_100m': None,
            'wind_direction_10m': None,
            'wind_direction_100m': None,
            'wind_gusts_10m': None,
        }
        response = self.request_weather(vars=vars)
        weather_vars = self.extract_weather_vars(response=response, vars=vars)

        pred_dict = self.make_prediction(grid_df=self.grid_df, clf=self.clf, weather_vars=weather_vars)

        return pred_dict

    def load_model(self, model_path):
        # model_path = MODEL_DIR / model_name
        with open(model_path, "rb") as f:
            clf = pickle.load(f)
        return clf
    
    
    def request_weather(self, vars):
        # connect to API
        try:
            cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
            retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
            openmeteo = openmeteo_requests.Client(session = retry_session)
        except:
            print("API connection failed.")
            
        latitude = LOCATION[0]
        longitude = LOCATION[1]

        url = "https://api.open-meteo.com/v1/forecast"
        params = {
            "latitude": latitude,
            "longitude": longitude,
            "hourly": list(vars.keys()),
            "forecast_days" : 1,
        }
        responses = openmeteo.weather_api(url, params=params)

        return responses[0]
    

    def extract_weather_vars(
        self,
        response,
        vars
    ):
        hourly = response.Hourly()

        # Fetch and process the first half of the variables
        for index, (name, _) in enumerate(vars.items()):
            vars[name] = hourly.Variables(index).ValuesAsNumpy()

        return vars
    
    def make_prediction(
        self,
        grid_df,
        clf,
        weather_vars
    ):
        all_preds = []
        pred_dict = {}
        for grid_id in grid_df.grid_id:
            pred_dict[grid_id] = []
        print(f"classes are {clf.classes_}")
        for i in range(self.hours_to_predict):
            grid = grid_df.copy(deep=True)
            for var, values in weather_vars.items():
                grid[str(var)] = values[i]
            preds = clf.predict_proba(grid[FEATURE_COLS])
            for id_, pred in zip(grid['grid_id'], preds):
                pred_dict[id_].append(pred)
                all_preds.append(preds)
        return pred_dict

In [44]:
from TreeInference import makeTreePrediction

predictor = makeTreePrediction(model_name="xgboost_md15_sub90.pkl", grid_path=GRID_ENRICHED_PATH, hours_to_predict=8)
pred_dict = predictor.get_predictions()


In [45]:
pred_dict[18]

[1, 1, 1, 1, 1, 1, 1, 1]

In [46]:
grid_df = pd.read_csv(GRID_ENRICHED_PATH)

In [47]:
grid_df[grid_df.has_tree]

Unnamed: 0.1,Unnamed: 0,grid_id,service_area,geometry,has_tree,avg_height,avg_diameter,avg_year,Populus,Acer,...,Onbekend,Quercus,Robinia,Betula,Carpinus,Overig,Crataegus,Prunus,Platanus,Malus
2464,5167,5167,Amstelveen,POLYGON ((4.8222461292952925 52.28399355001409...,True,10.250,0.647,1978.333,14.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2473,5176,5176,Amstelveen,POLYGON ((4.8222461292952925 52.30020976623032...,True,11.304,0.309,1988.560,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2478,5181,5181,Amstelveen,POLYGON ((4.8222461292952925 52.30921877523934...,True,12.786,0.000,1993.023,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2582,5304,5304,Amstelveen,POLYGON ((4.825193858379762 52.305615171635736...,True,5.609,0.158,2014.000,11.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2584,5306,5306,Amstelveen,POLYGON ((4.825193858379762 52.309218775239344...,True,3.000,0.247,1966.035,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8988,16546,16546,Weesp,POLYGON ((5.090489475981976 52.291200757221304...,True,14.108,0.000,1935.243,11.0,0.0,...,0.0,3.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0
8989,16547,16547,Weesp,"POLYGON ((5.090489475981976 52.29300255902311,...",True,13.649,0.000,1988.865,11.0,0.0,...,0.0,0.0,0.0,3.0,0.0,3.0,0.0,0.0,0.0,0.0
8990,16548,16548,Weesp,"POLYGON ((5.090489475981976 52.29480436082491,...",True,13.081,0.000,1960.623,22.0,4.0,...,0.0,3.0,0.0,4.0,0.0,4.0,0.0,1.0,0.0,0.0
8991,16549,16549,Weesp,POLYGON ((5.090489475981976 52.296606162626716...,True,8.183,0.000,2003.260,10.0,6.0,...,0.0,0.0,0.0,28.0,0.0,2.0,0.0,1.0,0.0,0.0


In [48]:
pred_dict.keys()

dict_keys([18, 19, 20, 140, 141, 142, 143, 144, 145, 146, 147, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 880, 882, 883, 884, 885, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1233, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277,

In [49]:
pred_dict[5306]

[0, 0, 0, 0, 0, 0, 0, 0]

In [50]:
with open("models/trees/xgboost_md15_sub90_mixed.pkl", "rb") as f:
    clf = pickle.load(f)

clf.classes_

array([0, 1])