In [None]:
from io import BytesIO
from ladybug.epw import EPW
from ladybug.location import Location
from ladybug.sunpath import Sunpath
from ladybug_comfort.collection.utci import UTCI
from zipfile import ZipFile
import geopandas as gpd
import ladybug_pandas as lbp
import numpy as np
import pandas as pd
import re
import requests

In [None]:
# https://climate.onebuilding.org/WMO_Region_4_North_and_Central_America/default.html
df = gpd.read_file(
    "Region4_USA_TMYx_EPW_Processing_locations.kml", driver="KML", header=0)
df.drop(df[df["Description"] == ""].index, inplace=True)

data = pd.DataFrame()
data["name"] = df["Name"]
data["latitude"] = df["geometry"].apply(lambda x: x.y)
data["longitude"] = df["geometry"].apply(lambda x: x.x)

df.head()

data["url"] = df["Description"].apply(
    lambda x: re.search("http.*?\.zip", x).group(0))
data.drop(data[data["url"].apply(
    lambda x: "TMYx.zip" not in x)].index, inplace=True)

data.index = data["url"].apply(
    lambda x: re.search(".*\.(.+?)_TMYx", x).group(1))
data.index.name = "wmo"
data.head()

In [None]:
import time

data["summer"] = np.nan
data["winter"] = np.nan

for i, row in data.iterrows():
    r = requests.get(row["url"])
    zf = ZipFile(BytesIO(r.content))
    namelist = zf.namelist()
    epw_file = [f for f in namelist if "epw" in f][0]
    zf.extract(epw_file, "epws")
    epw_file = row["url"].split("/")[-1].replace(".zip", ".epw")

    epw_data = EPW("epws/" + epw_file)
    utci = UTCI.from_epw(epw_data, include_wind=True, include_sun=True)

    df = pd.Series(utci.is_comfortable.values,
                   index=utci.is_comfortable.datetimes)
    df.drop(df[df.index.hour < 8].index, inplace=True)

    data.loc[i, "summer"] = df["2017-06-21":"2017-09-20"].mean()
    data.loc[i, "winter"] = pd.concat(
        (df["2017-01-01":"2017-03-20"], df["2017-12-21":"2017-12-31"])).mean()

    time.sleep(0.1)

data.drop("url", axis=1, inplace=True)

In [None]:
chicago = data.loc["725300", ["winter", "summer"]]

data["winter_score"] = (data["winter"] / chicago["winter"]).apply(np.log2)
data["summer_score"] = (data["summer"] / chicago["summer"]).apply(np.log2)
data.sort_values(by="winter_score", ascending=False, inplace=True)

In [None]:
print(data.to_string())
data.to_csv("weather.csv")