In [66]:
import numpy as np
import pygrib

In [113]:
path = "./data/all-canada-2023.grib"

all_canada_2023_grbs = pygrib.open(path)

In [114]:
all_canada_2024_01_01_grbs = pygrib.open("./data/all-canada-2024-01-01.grib")

In [118]:
all_canada_2023_grbs.seek(0)
grb = all_canada_2023_grbs.read(1)[0]

grb.values.shape
lats, lons = grb.latlons()

(np.min(lats), np.max(lats), np.min(lons), np.max(lons))

(41.0, 58.0, -137.0, 308.0)

In [80]:
ca_postal_codes = json.loads(open("./ca-postal-codes.json", "r").read())

def get_lat_lon(postal_code):
    info = ca_postal_codes[postal_code[:3]]
    return info["lat"], info["lng"]

get_lat_lon("K2A")

(45.3805, -75.7636)

In [34]:
import math


def relative_humidity(drybulb_temp_kelvin, dewpoint_temp_kelvin):
  # August-Roche-Magnus formula
  #
  # https://bmcnoldy.earth.miami.edu/Humidity.html
  # https://en.wikipedia.org/wiki/Clausius%E2%80%93Clapeyron_relation#Meteorology_and_climatology
  # 100*(EXP((17.625*TD)/(243.04+TD))/EXP((17.625*T)/(243.04+T)))
  # (Temperatures from this formula are in celcius)
  t = drybulb_temp_kelvin - 273.15
  td = dewpoint_temp_kelvin - 273.15
  return 100 * math.exp((17.625 * td) / (243.04 + td)) / math.exp((17.625 * t) / (243.04 + t))

print(relative_humidity(273.15 + (90 - 32) / 1.8, 273.15 + (63 - 32) / 1.8))


40.783503702475876


In [40]:
import requests

def get_elevation(lat, lon):
    url = f"https://api.open-elevation.com/api/v1/lookup?locations={lat},{lon}"
    response = requests.get(url)
    data = response.json()
    elevation = data["results"][0]["elevation"]
    return elevation

elevation = get_elevation(lat, lon)  # m

In [82]:
def get_timezone(lat, lon):
    response = requests.get(f"http://timezonefinder.michelfe.it/api/0_{lon}_{lat}")
    location = response.json()
    timezone_str = location["tz_name"]
    return timezone_str

get_timezone(*get_lat_lon("K2A"))

'America/Toronto'

In [58]:
%load_ext pyinstrument


The pyinstrument extension is already loaded. To reload it, use:
  %reload_ext pyinstrument


In [104]:
all_postal_codes = list(json.loads(open("ca-postal-codes.json").read()).keys())
len(all_postal_codes)

1651

In [121]:
%%pyinstrument

def get_idx_for_lat_lon(lat, lon):
    # Find the grid point with closest lat/lon
    abslat = np.abs(lats-lat)
    abslon = np.abs(lons-lon)
    c = np.sqrt(np.add(np.square(abslat), np.square(abslon)))
    idx = np.argmin(c)
    return idx


# Took 2m 9.7s
postal_codes = ["K2A", "V5K", "H3H", "R3T", "T6G"]

# Took 7m 58.8s
# postal_codes = all_postal_codes[:20]

# Roughly 24s/postal

lat_lons = [get_lat_lon(code) for code in postal_codes]
idx = [get_idx_for_lat_lon(lat, lon) for (lat, lon) in lat_lons]

era5_data = {}

# We index all of the data for every postal code up-front because we want to
# avoid doing multiple scans of the 2GB file
for grbs in [all_canada_2023_grbs, all_canada_2024_01_01_grbs]:
    grbs.seek(0)
    for grb in grbs.read():
        year = str(grb.dataDate)[:4]
        month = str(grb.dataDate)[4:6]
        day = str(grb.dataDate)[6:]

        time = '{:04d}'.format(grb.dataTime)
        hour = time[:2]
        minute = time[2:]

        dt = '{}-{}-{}T{}:{}:00+00:00'.format(year, month, day, hour, minute)
        for i, postal_code in enumerate(postal_codes):
            era5_data.setdefault(postal_code, {}).setdefault(dt, {})[grb.name] = grb.values.flat[idx[i]]

In [127]:
list(era5_data['K2A'].keys())[-1]

'2024-01-01T23:00:00+00:00'

In [128]:
%%pyinstrument
wind_u_key = '10 metre U wind component'
wind_v_key = '10 metre V wind component'
dewpoint_temp_key = '2 metre dewpoint temperature'
temp_key = '2 metre temperature'
cloud_cover_key = 'Total cloud cover'

timezones = [get_timezone(lat, lon) for (lat, lon) in lat_lons]
elevations = [get_elevation(lat, lon) for (lat, lon) in lat_lons]

# With 5 postal codes, this took 1m 11.0s
#
# Roughly 14s/postal
#
# Total with above: 38s/postal
#
# End result: 1651 postal codes * 38s/postal = ~17 hours
#
# Worth doing this as the last step after iterating with a small subset of postal codes

results_by_postal_code = {}
for i, postal_code in enumerate(postal_codes):
    results_by_postal_code[postal_code] = {
        "postalCode": postal_code,
        "timezoneName": timezones[i],
        "elevationMeters": elevations[i],
    }
    results = results_by_postal_code[postal_code]["weather"] = []

    for dt, vs in era5_data[postal_code].items():
        dt_obj = datetime.fromisoformat(dt)

        solar_altitude = pysolar.solar.get_altitude(lat, lon, dt_obj, elevations[i])
        solar_radiation = pysolar.radiation.get_radiation_direct(dt_obj, solar_altitude) if solar_altitude > 0 else 0

        temp = vs[temp_key]
        dewpoint_temp = vs[dewpoint_temp_key]
        rh = relative_humidity(temp, dewpoint_temp)

        wind_u = vs[wind_u_key]
        wind_v = vs[wind_v_key]
        wind_speed_10m = math.sqrt(wind_u * wind_u + wind_v * wind_v)
        wind_speed_2m = wind_speed_10m * wind_speed_10m_to_2m_ratio

        cloud_cover = vs[cloud_cover_key]

        row = {
            "datetime": dt_obj.isoformat(),
            "outsideAirTempF": float("{:.1f}".format((temp - 273.15) * 1.8 + 32)),
            "relativeHumidityPercent": float("{:.1f}".format(rh)),
            "windSpeedMph": float("{:.1f}".format(wind_speed_2m * 2.237)),
            "cloudCoverPercent": float("{:.1f}".format(cloud_cover * 100.0)),
            "solarIrradiance": {
                "altitudeDegrees": float("{:.1f}".format(solar_altitude)),
                "wattsPerSquareMeter": float("{:.1f}".format(solar_radiation))
            }
        }
        results.append(row)

results_by_postal_code

{'K2A': {'postalCode': 'K2A',
  'timezoneName': 'America/Toronto',
  'elevationMeters': 73.0,
  'weather': [{'datetime': '2023-01-01T00:00:00+00:00',
    'outsideAirTempF': 37.1,
    'relativeHumidityPercent': 99.3,
    'windSpeedMph': 1.9,
    'cloudCoverPercent': 100.0,
    'solarIrradiance': {'altitudeDegrees': 2.5, 'wattsPerSquareMeter': 49.8}},
   {'datetime': '2023-01-01T01:00:00+00:00',
    'outsideAirTempF': 37.3,
    'relativeHumidityPercent': 99.4,
    'windSpeedMph': 2.3,
    'cloudCoverPercent': 100.0,
    'solarIrradiance': {'altitudeDegrees': -5.8, 'wattsPerSquareMeter': 0.0}},
   {'datetime': '2023-01-01T02:00:00+00:00',
    'outsideAirTempF': 37.4,
    'relativeHumidityPercent': 99.2,
    'windSpeedMph': 3.0,
    'cloudCoverPercent': 100.0,
    'solarIrradiance': {'altitudeDegrees': -14.8, 'wattsPerSquareMeter': 0.0}},
   {'datetime': '2023-01-01T03:00:00+00:00',
    'outsideAirTempF': 37.3,
    'relativeHumidityPercent': 99.2,
    'windSpeedMph': 4.2,
    'cloudCoverPe

In [129]:
results_by_postal_code.keys()

dict_keys(['K2A', 'V5K', 'H3H', 'R3T', 'T6G'])

In [130]:
import json

for postal_code, results in results_by_postal_code.items():
    with open(f'../../static/data/weather/2023-era5-{postal_code}.json', 'w') as file:
        json.dump(results, file)