In [3]:
import pandas as pd
import numpy as np
import xarray as xr
import time
import aggfly as af
from dask.distributed import Client, progress
import geopandas as gpd

project_dir = '/home3/dth2133/data/clim_data_requests/maya'

In [4]:
# Years to aggregate
years = np.arange(2018,2019)

# Load shapefile
georegions = af.georegions_from_path(
    "/home3/dth2133/data/shapefiles/county/cb_2018_us_county_500k.shp",
    regionid='GEOID'
)

In [5]:
# Open example dataset to construct weights
dataset = af.dataset_from_path(
    f"/home3/dth2133/data/annual/tempPrecLand2017.zarr", 
    var = 't2m',
    name = 'era5',
    georegions=georegions,
    preprocess = lambda x: (x - 273.15),
)
dataset.da

Unnamed: 0,Array,Chunk
Bytes,42.35 GiB,118.81 MiB
Shape,"(860, 1509, 8760)","(860, 1509, 24)"
Dask graph,365 chunks in 7 graph layers,365 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 42.35 GiB 118.81 MiB Shape (860, 1509, 8760) (860, 1509, 24) Dask graph 365 chunks in 7 graph layers Data type float32 numpy.ndarray",8760  1509  860,

Unnamed: 0,Array,Chunk
Bytes,42.35 GiB,118.81 MiB
Shape,"(860, 1509, 8760)","(860, 1509, 24)"
Dask graph,365 chunks in 7 graph layers,365 chunks in 7 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [5]:
secondary_weights = af.pop_weights_from_path("/home3/dth2133/data/population/landscan-global-2016.tif")

# Calculate area weights.
weights = af.weights_from_objects(
    dataset,
    georegions,
    # secondary_weights=secondary_weights,
    project_dir=project_dir
)
weights.calculate_weights()

/home3/dth2133/data/clim_data_requests/maya/tmp/GridWeights/mod-c442a87c91f294c
Loading rescaled weights from cache
Cache dictionary:
{'func': 'weights', 'raster_weights': None}


In [9]:
df = pd.DataFrame()
for year in years:
    print(f"Aggregating {year}")
    
    start = time.time()
    
    dataset = af.dataset_from_path(
        f"/home3/dth2133/data/annual/tempPrecLand{year}.zarr", 
        var = 't2m',
        name = 'era5',
        georegions=georegions,
        preprocess = lambda x: (x - 273.15)
    )
    
    output_df = af.aggregate_dataset(
        dataset=dataset, 
        weights=weights,
        tavg = [
            ('aggregate', {'calc':'mean', 'groupby':'date'}),
            ('transform', {'transform':'power', 'exp':np.arange(1,2)}),
            ('aggregate', {'calc':'sum', 'groupby':'year'})
        ],
        bins= [
            ('aggregate', {'calc':'mean', 'groupby':'date'}),
            ('aggregate', {'calc':'bins', 'groupby':'year', 'ddargs':[[25,99,0],[30,99,0]]})
        ],
        growing_dday = [
            ('aggregate', {'calc':'dd', 'groupby':'date', 'ddargs':[10,30,0]}),
            ('aggregate', {'calc':'sum', 'groupby':'year'}),
        ],
        heating_dday = [
            ('aggregate', {'calc':'dd', 'groupby':'date', 'ddargs':[-99,20,1]}),
            ('aggregate', {'calc':'sum', 'groupby':'year'}),
        ]
    )
    
    df = pd.concat([df, output_df], axis=0)
    end = time.time()
    print(f"Year {year} took {end-start} seconds.")



Aggregating 2018
Computing...
[########################################] | 100% Completed | 117.91 s
Combining datasets...
Stacking...
Merging...
Grouping...
Merging again...
Creating Dask DataFrame...
Aggregating...
[########################################] | 100% Completed | 448.17 ms
