In [1]:
import os
import json
import folium
import numpy as np
import polars as pl

from timezonefinder import TimezoneFinder
from typing import List, Dict
from datetime import datetime, timedelta
from pytz import timezone
from src.openmeteo import get_hist_temp
from src.models import MeteoSource, MeteoPredictor

In [2]:
num_rows = (pl.scan_csv("data/eVED/*.csv")
              .select(pl.col("DayNum"))
              .count()).collect()

In [3]:
int(num_rows.to_numpy()[0][0])

22436808

In [4]:
temp_samples = [pl.read_csv(f"data/eVED/{file}", 
                            columns=["DayNum", "Timestamp(ms)", "Latitude[deg]", "Longitude[deg]", "OAT[DegC]"],
                            schema_overrides={"DayNum": pl.Float64, 
                                              "Timestamp(ms)": pl.Float64, 
                                              "Latitude[deg]": pl.Float64, 
                                              "Longitude[deg]": pl.Float64,
                                              "OAT[DegC]": pl.Float64})
                  .filter(pl.col("OAT[DegC]").is_not_nan())
                  .sample(n=50) 
                for file in os.listdir("data/eVED/")]

In [5]:
sample_df = pl.concat(temp_samples, rechunk=True)

In [6]:
sample_df

DayNum,Timestamp(ms),Latitude[deg],Longitude[deg],OAT[DegC]
f64,f64,f64,f64,f64
193.742697,170900.0,42.264098,-83.750708,20.0
191.556449,909000.0,42.282247,-83.746422,16.0
195.63393,106300.0,42.255647,-83.797987,7.0
191.485366,3100.0,42.229349,-83.738711,9.0
190.585671,189900.0,42.245103,-83.690102,15.0
…,…,…,…,…
334.593673,686800.0,42.237072,-83.681292,23.0
332.865063,534900.0,42.280174,-83.699263,19.0
332.865063,135700.0,42.288047,-83.692936,19.0
331.870983,2.8671e6,42.233888,-83.712032,26.0


In [7]:
sample_df.to_numpy()

array([[ 1.93742697e+02,  1.70900000e+05,  4.22640981e+01,
        -8.37507083e+01,  2.00000000e+01],
       [ 1.91556449e+02,  9.09000000e+05,  4.22822469e+01,
        -8.37464225e+01,  1.60000000e+01],
       [ 1.95633930e+02,  1.06300000e+05,  4.22556467e+01,
        -8.37979872e+01,  7.00000000e+00],
       ...,
       [ 3.32865063e+02,  1.35700000e+05,  4.22880467e+01,
        -8.36929358e+01,  1.90000000e+01],
       [ 3.31870983e+02,  2.86710000e+06,  4.22338878e+01,
        -8.37120319e+01,  2.60000000e+01],
       [ 3.34820863e+02,  9.30000000e+03,  4.22443258e+01,
        -8.37281917e+01,  2.10000000e+01]])

In [None]:
def create_folium_map() -> folium.Map:
    folium_map = folium.Map(prefer_canvas=True, tiles="cartodbpositron")
    return folium_map

In [None]:
def fit_bounds(folium_map: folium.Map, locations: np.ndarray) -> folium.Map:
    min_lat, max_lat = locations[:, 0].min(), locations[:, 0].max()
    min_lon, max_lon = locations[:, 1].min(), locations[:, 1].max()
    folium_map.fit_bounds([[min_lat, min_lon], [max_lat, max_lon]])
    return folium_map

In [None]:
def get_temp_locations(files: str = "data/eVED/*.csv") -> np.ndarray:
    lf = (pl.scan_csv(files)
          .select([pl.col("Matchted Latitude[deg]").alias("lat"),
                   pl.col("Matched Longitude[deg]").alias("lon")]))
    loc_max = lf.max().collect().to_numpy()[0]
    loc_min = lf.min().collect().to_numpy()[0]
    loc_mid = (loc_min + loc_max) / 2
    locations = np.array([
        (loc_min[0], loc_min[1]),
        (loc_min[0], loc_mid[1]),
        (loc_min[0], loc_max[1]),
        (loc_mid[0], loc_min[1]),
        (loc_mid[0], loc_mid[1]),
        (loc_mid[0], loc_max[1]),
        (loc_max[0], loc_min[1]),
        (loc_max[0], loc_mid[1]),
        (loc_max[0], loc_max[1])
    ])
    return locations

In [None]:
get_temp_locations()

In [None]:
date_min = "2017-11-01"
date_max = "2018-12-01"

In [None]:
def get_temp_sources(temp_locations: np.ndarray) -> List[Dict]:
    temp_sources = []
    for i, location in enumerate(get_temp_locations()):
        filename = f"./data/openmeteo/location_{i}.json"
        if not os.path.exists(filename):
            temperatures = get_hist_temp(*location, start_date=date_min, end_date=date_max)
            with open(filename, "w") as f:
                f.write(json.dumps(temperatures))
        else:
            with open(filename, "r") as f:
                temperatures = json.loads(f.read())
        temp_sources.append(temperatures)
    return temp_sources

In [None]:
temp_sources = get_temp_sources(get_temp_locations())

In [None]:
sources = [MeteoSource.from_temp_source(temp_src) for temp_src in temp_sources]

In [None]:
predictor = MeteoPredictor(sources)

In [None]:
predictor.predict(latitude=42.220268,
                  longitude=-83.739138,
                  timestamp=datetime(year=2018, month=8, day=1, hour=10, minute=0, tzinfo=timezone('America/Detroit')))

In [None]:
base_dt = datetime(year=2017, month=11, day=1, tzinfo=timezone("America/Detroit"))

In [None]:
base_dt

In [None]:
dt = base_dt + timedelta(days=43.921533287-1) + timedelta(milliseconds=0)

In [None]:
dt

In [None]:
predictor.predict(42.2867688889, -83.7323916667, dt, power=1)

In [None]:
tf = TimezoneFinder()
tf.timezone_at(lat=42.2659958333, lng=-83.7391669444)

In [None]:
def make_datetime(day_num: float, ts: int) -> float:
    base = datetime(year=2017, month=11, day=1, tzinfo=timezone("America/Detroit"))
    dt = base + timedelta(days=day_num - 1) + timedelta(milliseconds=ts)
    return dt.timestamp()

In [None]:
date_lf = (pl.scan_csv(f"data/eVED/*.csv")
           .select([pl.col("DayNum").alias("day_num"), 
                    pl.col("Timestamp(ms)").alias("ts"),
                    pl.col("Matchted Latitude[deg]").alias("lat"),
                    pl.col("Matched Longitude[deg]").alias("lon"),
                    pl.col("OAT[DegC]").alias("temperature")])
           .with_columns(pl.struct(["day_num", "ts"])
                         .map_elements(lambda x: make_datetime(x["day_num"], x["ts"]),
                                       return_dtype=pl.Float64)
                         .alias("timestamp"))
           )

In [None]:
date_lf.head(20).collect()

In [None]:
base_dt.timestamp()

In [None]:
1509565006663200 / 1000 / 1000

In [None]:
date_lf.select(pl.col("temperature")).min().collect()

In [None]:
date_lf.filter(pl.col("temperature") != "nan").select(pl.col("temperature").cast(pl.Float64)).max().collect()

In [None]:
date_lf.filter(pl.col("temperature") != "nan").select(pl.col("temperature").cast(pl.Float64)).min().collect()