In [6]:
import fsspec
import xarray as xr

import pandas as pd

from tqdm import tqdm

In [7]:
filepath = 'https://power-analysis-ready-datastore.s3.amazonaws.com/power_901_monthly_meteorology_utc.zarr'
filepath_mapped = fsspec.get_mapper(filepath)

ds = xr.open_zarr(store=filepath_mapped, consolidated=True)
ds

In [8]:
# SOURCE = /power_901_monthly_meteorology_utc.zarr/
# TSURF -------
# long_name     : Surface Temperature of Land and Snow
# standard_name : Surface_Temperature_of_Land_and_Snow
# units         : K
# valid_max     : 350.0
# valid_min     : 150.0
# valid_range   : 150.0, 350.0]

# GWETTOP -----
# long_name     : Surface Soil Wetness
# standard_name : Surface_Soil_Wetness
# units         : 1
# valid_max     : 1.0
# valid_min     : 0.0
# valid_range   : [0.0, 1.0]

# PRECSNO --------
# long_name     : Snow Precipitation
# standard_name : Snow_Precipitation
# units         : kg m-2 s-1
# valid_max     : 0.0005
# valid_min     : 0.0
# valid_range   : [0.0, 0.0005]

# RH2M -------
# long_name     : Relative Humidity at 2 Meters
# standard_name : Relative_Humidity_at_2_Meters
# units         : %
# valid_max     : 100.0
# valid_min     : 0.0
# valid_range   : [0.0, 100.0]

# WS2M -------
# long_name     : Wind Speed at 2 Meters
# standard_name : Wind_Speed_at_2_Meters
# units         : m/s
# valid_max     : 50.0
# valid_min     : 0.0
# valid_range   : [0.0, 50.0]

# Source = /power_901_daily_precipitation_utc.zarr/
# PRECIPITATIONCAL ------
# long_name : The accumulated precipitation from all available infrared (IR) and microwave (MW) sources.
# units     : mm/day

## Select Columns of Interest

In [9]:
selected = ds[["TSURF", "GWETTOP", "PRECSNO", "RH2M", "WS2M"]]
selected

## Filter Data by County

In [10]:
counties = [
    { "coords": dict(lat=41.8244, lon=-88.0901), "county": "Dupage", "state": "IL" },
    { "coords": dict(lat=41.7377, lon=-87.6976), "county": "Cook", "state": "IL" },
    { "coords": dict(lat=35.4168, lon=-80.5883), "county": "Cabarrus", "state": "NC" },
    { "coords": dict(lat=35.2633, lon=-80.8544), "county": "Mecklenburg", "state": "NC" },
    { "coords": dict(lat=28.4845, lon=-81.2519), "county": "Orange", "state": "FL" },
    { "coords": dict(lat=28.7132, lon=-81.2078), "county": "Seminole", "state": "FL" },
    { "coords": dict(lat=35.8496, lon=-106.3228), "county": "Los Alamos", "state": "NM" },
    { "coords": dict(lat=34.7492, lon=-82.9932), "county": "Oconee", "state": "SC" },
    { "coords": dict(lat=35.8032, lon=-78.5661), "county": "Wake", "state": "NC" },
    { "coords": dict(lat=39.2873, lon=-76.9643), "county": "Howard", "state": "MD" },
    { "coords": dict(lat=33.1795, lon=-96.4930), "county": "Collin", "state": "TX" },
    { "coords": dict(lat=33.8999, lon=-84.5641), "county": "Cobb", "state": "GA" },
    { "coords": dict(lat=37.7749, lon=-122.4194), "county": "San Francisco", "state": "CA" },
    { "coords": dict(lat=34.3705, lon=-119.1391), "county": "Ventura", "state": "CA" },
    { "coords": dict(lat=47.5480, lon=-121.9836), "county": "King", "state": "WA" },
]

In [11]:
data = list(map(
    lambda c: { **c, "data": selected.sel(c["coords"], method="nearest", tolerance=0.5) },
    counties
))

### Sanity Check

In [12]:
for county in data:
    dims = county["data"].dims
    msg = f"Data for {county['county']} County is".ljust(32)

    if len(dims) != 1 or dims['time'] != 492:
        print(f"{msg} : WRONG!  <-----")
    else:
        print(f"{msg} : correct!")

Data for Dupage County is        : correct!
Data for Cook County is          : correct!
Data for Cabarrus County is      : correct!
Data for Mecklenburg County is   : correct!
Data for Orange County is        : correct!
Data for Seminole County is      : correct!
Data for Los Alamos County is    : correct!
Data for Oconee County is        : correct!
Data for Wake County is          : correct!
Data for Howard County is        : correct!
Data for Collin County is        : correct!
Data for Cobb County is          : correct!
Data for San Francisco County is : correct!
Data for Ventura County is       : correct!
Data for King County is          : correct!


## Combine the Data

In [13]:
data_as_pd = pd.DataFrame({})

for county in tqdm(data):
    df = county["data"].to_dataframe()
    # Convert Kelvin to Fahrenheit
    df["TSURF_FAHREN"] = (9/5) * (df["TSURF"] - 273) + 32
    # Reattach the coordinates
    df["lat"] = county["coords"]["lat"]
    df["lon"] = county["coords"]["lon"]
    # Attach the state and county names
    df["state"] = county["state"]
    df["county"] = county["county"]

    # Append the new data to the dataframe
    data_as_pd = pd.concat([data_as_pd, df])
    

100%|██████████| 15/15 [00:31<00:00,  2.10s/it]


In [14]:
data_as_pd

Unnamed: 0_level_0,TSURF,GWETTOP,PRECSNO,RH2M,WS2M,lat,lon,TSURF_FAHREN,state,county
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1981-01-31,265.109375,0.718750,0.000000e+00,86.4375,3.687500,41.8244,-88.0901,17.796875,IL,Dupage
1981-02-28,269.453125,0.734375,1.525879e-05,85.1875,4.945312,41.8244,-88.0901,25.615625,IL,Dupage
1981-03-31,275.570312,0.710938,0.000000e+00,74.1250,3.851562,41.8244,-88.0901,36.626562,IL,Dupage
1981-04-30,283.476562,0.710938,0.000000e+00,74.2500,4.554688,41.8244,-88.0901,50.857813,IL,Dupage
1981-05-31,286.171875,0.718750,0.000000e+00,71.4375,3.515625,41.8244,-88.0901,55.709375,IL,Dupage
...,...,...,...,...,...,...,...,...,...,...
2021-08-31,289.765625,0.429688,0.000000e+00,74.8125,0.289062,47.5480,-121.9836,62.178125,WA,King
2021-09-30,286.656250,0.414062,0.000000e+00,75.3125,0.296875,47.5480,-121.9836,56.581250,WA,King
2021-10-31,280.710938,0.546875,3.492460e-10,86.1250,0.320312,47.5480,-121.9836,45.879688,WA,King
2021-11-30,278.234375,0.718750,5.003123e-06,92.4375,0.281250,47.5480,-121.9836,41.421875,WA,King


### Save

In [15]:
data_as_pd.to_csv("./all_counties.csv")