In [1]:
import fsspec
import xarray as xr

import plotly.graph_objects as go
import pandas as pd
from tqdm import tqdm

In [2]:
filepath = 'https://power-analysis-ready-datastore.s3.amazonaws.com/power_901_monthly_meteorology_utc.zarr'
filepath_mapped = fsspec.get_mapper(filepath)

ds = xr.open_zarr(store=filepath_mapped, consolidated=True)
ds

In [3]:
# SOURCE = /power_901_monthly_meteorology_utc.zarr/
# TSURF -------
# long_name     : Surface Temperature of Land and Snow
# standard_name : Surface_Temperature_of_Land_and_Snow
# units         : K
# valid_max     : 350.0
# valid_min     : 150.0
# valid_range   : 150.0, 350.0]

# GWETTOP -----
# long_name     : Surface Soil Wetness
# standard_name : Surface_Soil_Wetness
# units         : 1
# valid_max     : 1.0
# valid_min     : 0.0
# valid_range   : [0.0, 1.0]

# PRECSNO --------
# long_name     : Snow Precipitation
# standard_name : Snow_Precipitation
# units         : kg m-2 s-1
# valid_max     : 0.0005
# valid_min     : 0.0
# valid_range   : [0.0, 0.0005]

# RH2M -------
# long_name     : Relative Humidity at 2 Meters
# standard_name : Relative_Humidity_at_2_Meters
# units         : %
# valid_max     : 100.0
# valid_min     : 0.0
# valid_range   : [0.0, 100.0]

# WS2M -------
# long_name     : Wind Speed at 2 Meters
# standard_name : Wind_Speed_at_2_Meters
# units         : m/s
# valid_max     : 50.0
# valid_min     : 0.0
# valid_range   : [0.0, 50.0]

# Source = /power_901_daily_precipitation_utc.zarr/
# PRECIPITATIONCAL ------
# long_name : The accumulated precipitation from all available infrared (IR) and microwave (MW) sources.
# units     : mm/day

## Select Columns of Interest

In [4]:
selected = ds[["TSURF", "GWETTOP", "PRECSNO", "RH2M", "WS2M"]]
selected

## Select Data from 2015-2021
We also drop NA values which are readings of the sea.

In [5]:
all_data = selected.to_dataframe().reset_index()
all_data

Unnamed: 0,time,lat,lon,TSURF,GWETTOP,PRECSNO,RH2M,WS2M
0,1981-01-31,-90.0,-180.000,,1.0,0.000000,91.3750,2.257812
1,1981-01-31,-90.0,-179.375,,1.0,0.000000,91.3750,2.257812
2,1981-01-31,-90.0,-178.750,,1.0,0.000000,91.3750,2.265625
3,1981-01-31,-90.0,-178.125,,1.0,0.000000,91.3750,2.273438
4,1981-01-31,-90.0,-177.500,,1.0,0.000000,91.3750,2.281250
...,...,...,...,...,...,...,...,...
102304507,2021-12-31,90.0,176.875,,1.0,0.000005,89.9375,4.117188
102304508,2021-12-31,90.0,177.500,,1.0,0.000005,89.9375,4.125000
102304509,2021-12-31,90.0,178.125,,1.0,0.000005,89.9375,4.132812
102304510,2021-12-31,90.0,178.750,,1.0,0.000005,89.9375,4.140625


In [6]:
all_data.describe()

Unnamed: 0,lat,lon,TSURF,GWETTOP,PRECSNO,RH2M,WS2M
count,102304500.0,102304500.0,28966990.0,102304500.0,102304500.0,102304500.0,102304500.0
mean,0.0,-0.3125,282.6836,0.889688,4.425585e-06,81.74603,5.157919
std,52.10566,103.9229,18.04416,0.225686,9.143772e-06,14.47399,2.32225
min,-90.0,-180.0,220.0938,0.0078125,0.0,5.875,0.0
25%,-45.0,-90.15625,271.0703,0.9140625,0.0,78.25,3.585938
50%,-1.79751e-13,-0.3125,286.1875,1.0,0.0,83.3125,5.195312
75%,45.0,89.53125,297.8828,1.0,7.629395e-06,90.8125,6.679688
max,90.0,179.375,317.5,1.0,0.0006561279,100.0,18.64844


In [7]:
all_data.dropna(inplace=True)
all_data.reset_index(inplace=True)
all_data

Unnamed: 0,index,time,lat,lon,TSURF,GWETTOP,PRECSNO,RH2M,WS2M
0,34806,1981-01-31,-60.0,-26.250,275.656250,1.000000,0.000015,92.8125,6.421875
1,35378,1981-01-31,-59.5,-28.750,275.781250,1.000000,0.000023,92.8125,6.304688
2,35379,1981-01-31,-59.5,-28.125,275.781250,0.953125,0.000023,92.9375,6.281250
3,35380,1981-01-31,-59.5,-27.500,275.750000,0.914062,0.000023,93.0000,6.304688
4,35381,1981-01-31,-59.5,-26.875,275.664062,0.890625,0.000015,93.0625,6.382812
...,...,...,...,...,...,...,...,...,...
28966987,102297252,2021-12-31,84.0,-37.500,244.812500,0.984375,0.000005,97.1250,5.078125
28966988,102297253,2021-12-31,84.0,-36.875,244.812500,0.984375,0.000005,97.3750,5.117188
28966989,102297254,2021-12-31,84.0,-36.250,244.812500,0.984375,0.000006,97.6250,5.156250
28966990,102297255,2021-12-31,84.0,-35.625,244.812500,0.992188,0.000006,97.7500,5.195312


In [8]:
all_data.drop(columns=["index"], inplace=True)

## Convert Kelvin to Fahrenheit

In [9]:
all_data["F_TSURF"] = (9/5) * (all_data["TSURF"] - 273) + 32

all_data

Unnamed: 0,time,lat,lon,TSURF,GWETTOP,PRECSNO,RH2M,WS2M,F_TSURF
0,1981-01-31,-60.0,-26.250,275.656250,1.000000,0.000015,92.8125,6.421875,36.781250
1,1981-01-31,-59.5,-28.750,275.781250,1.000000,0.000023,92.8125,6.304688,37.006250
2,1981-01-31,-59.5,-28.125,275.781250,0.953125,0.000023,92.9375,6.281250,37.006250
3,1981-01-31,-59.5,-27.500,275.750000,0.914062,0.000023,93.0000,6.304688,36.950000
4,1981-01-31,-59.5,-26.875,275.664062,0.890625,0.000015,93.0625,6.382812,36.795313
...,...,...,...,...,...,...,...,...,...
28966987,2021-12-31,84.0,-37.500,244.812500,0.984375,0.000005,97.1250,5.078125,-18.737500
28966988,2021-12-31,84.0,-36.875,244.812500,0.984375,0.000005,97.3750,5.117188,-18.737500
28966989,2021-12-31,84.0,-36.250,244.812500,0.984375,0.000006,97.6250,5.156250,-18.737500
28966990,2021-12-31,84.0,-35.625,244.812500,0.992188,0.000006,97.7500,5.195312,-18.737500


## Get US Counties Coordinates

In [10]:
us_counties = pd.read_csv("./us_counties.csv")
us_counties

Unnamed: 0,fips_code,name,lng,lat
0,1059,Franklin,-87.843283,34.442381
1,13111,Fannin,-84.319296,34.864126
2,19109,Kossuth,-94.206898,43.204140
3,40115,Ottawa,-94.810589,36.835878
4,42115,Susquehanna,-75.800905,41.821277
...,...,...,...,...
3228,12029,Dixie,-83.158705,29.608068
3229,18017,Cass,-86.346207,40.761660
3230,26091,Lenawee,-84.066412,41.894694
3231,72003,Aguada,-67.175247,18.360392


## Get Counties of Interest

In [17]:
latitudes = [35.5,36.0,26.5,28.0,29.5,29.0,46.5,48.0,43.5,42.5,40.5,43.5]
longitudes = [80.625,81.25,80.625,81.25,98.75,95.625,120.625,121.875,72.5,71.25,-93.75,-102.5]

In [18]:
all_in_lat = all_data[all_data['lat'].isin(latitudes)]
all_of_interest = all_in_lat[all_in_lat['lon'].isin(longitudes)]

In [19]:
all_of_interest

Unnamed: 0,time,lat,lon,TSURF,GWETTOP,PRECSNO,RH2M,WS2M,F_TSURF
22283,1981-01-31,26.5,-102.500,282.843750,0.460938,0.000008,65.5000,2.617188,49.718750
22437,1981-01-31,26.5,71.250,289.304688,0.187500,0.000000,34.1250,2.281250,61.348438
22439,1981-01-31,26.5,72.500,289.265625,0.171875,0.000000,35.5000,2.156250,61.278125
22452,1981-01-31,26.5,80.625,286.992188,0.500000,0.000000,59.5625,1.429688,57.185938
22453,1981-01-31,26.5,81.250,287.304688,0.468750,0.000000,58.9375,1.484375,57.748438
...,...,...,...,...,...,...,...,...,...
28943124,2021-12-31,48.0,81.250,262.484375,0.445312,0.000008,81.1250,3.031250,13.071875
28943147,2021-12-31,48.0,95.625,255.289062,0.429688,0.000001,74.7500,2.296875,0.120313
28943152,2021-12-31,48.0,98.750,252.585938,0.601562,0.000001,76.1250,2.804688,-4.745313
28943187,2021-12-31,48.0,120.625,252.984375,0.757812,0.000004,94.1875,4.507812,-4.028125


In [20]:
all_of_interest.to_csv("12_counties_data.csv")