In [1]:
import os
import json
import numpy as np
import polars as pl

from typing import List, Dict
from datetime import datetime, timedelta
from pytz import timezone
from src.openmeteo import get_hist_temp
from src.models import MeteoPredictor, MeteoSource

In [2]:
def get_temp_locations(files: str = "data/eVED/*.csv") -> np.ndarray:
    lf = (pl.scan_csv(files)
          .select([pl.col("Matchted Latitude[deg]").alias("lat"),
                   pl.col("Matched Longitude[deg]").alias("lon")]))
    loc_max = lf.max().collect().to_numpy()[0]
    loc_min = lf.min().collect().to_numpy()[0]
    loc_mid = (loc_min + loc_max) / 2
    locations = np.array([
        (loc_min[0], loc_min[1]),
        (loc_min[0], loc_mid[1]),
        (loc_min[0], loc_max[1]),
        (loc_mid[0], loc_min[1]),
        (loc_mid[0], loc_mid[1]),
        (loc_mid[0], loc_max[1]),
        (loc_max[0], loc_min[1]),
        (loc_max[0], loc_mid[1]),
        (loc_max[0], loc_max[1])
    ])
    return locations

In [3]:
def get_temp_sources(temp_locations: np.ndarray) -> List[Dict]:
    temp_sources = []
    date_min = "2017-11-01"
    date_max = "2018-12-01"
    for i, location in enumerate(get_temp_locations()):
        filename = f"./data/openmeteo/location_{i}.json"
        if not os.path.exists(filename):
            temperatures = get_hist_temp(*location, start_date=date_min, end_date=date_max)
            with open(filename, "w") as f:
                f.write(json.dumps(temperatures))
        else:
            with open(filename, "r") as f:
                temperatures = json.loads(f.read())
        temp_sources.append(temperatures)
    return temp_sources

In [4]:
temp_df = pl.read_csv("data/temperatures.csv")

In [5]:
temp_sources = get_temp_sources(get_temp_locations())
sources = [MeteoSource.from_temp_source(temp_src) for temp_src in temp_sources]
predictor = MeteoPredictor(sources)

In [6]:
base_dt = datetime(year=2017, month=11, day=1, tzinfo=timezone("America/Detroit"))
temp_arr = temp_df.to_numpy()
pred_arr = np.zeros(temp_arr.shape[0])
for i, (day_num, timestamp, lat, lon, oat, temp) in enumerate(temp_arr):
    # print(i, day_num, timestamp, lat, lon, oat, temp)
    dt = base_dt + timedelta(days=day_num - 1) + timedelta(milliseconds=timestamp)
    pred_arr[i] = predictor.predict(lat, lon, dt, power=10)

In [7]:
temp_df = temp_df.with_columns(pl.Series(name="Pred[DegC]", values=pred_arr))

In [8]:
temp_df

DayNum,Timestamp(ms),Latitude[deg],Longitude[deg],OAT[DegC],Temp[DegC],Pred[DegC]
f64,f64,f64,f64,f64,f64,f64
190.802671,195600.0,42.25117,-83.74965,16.0,22.336311,22.149925
194.521585,280700.0,42.297882,-83.753558,9.0,15.830457,15.614344
195.594727,782000.0,42.308659,-83.688095,13.0,20.698133,20.797599
194.89829,602500.0,42.277627,-83.69994,7.0,14.527369,14.377369
192.529954,146400.0,42.27274,-83.698013,16.0,7.807648,7.657652
…,…,…,…,…,…,…
331.957463,381000.0,42.229452,-83.739311,23.0,10.31699,10.315212
336.859603,2500.0,42.232183,-83.738776,19.0,14.505721,14.518553
330.73581,187900.0,42.244416,-83.683725,17.0,16.959534,16.990064
332.714107,5900.0,42.254719,-83.726068,19.0,17.707451,17.866367


In [9]:
diff_arr = temp_df.select(pl.col("Temp[DegC]", "Pred[DegC]")).to_numpy()

In [10]:
rmse = float(np.sqrt(np.mean((diff_arr[:, 0] - diff_arr[:, 1]) ** 2)))

In [11]:
rmse

0.15234904013517364