In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import time
import aggfly as af
import dask
from dask.distributed import Client, progress

project_dir = '/home3/dth2133/data/clim_data_requests/maya'

In [2]:
client = af.start_dask_client(n_workers=25, threads_per_worker=2)

In [3]:
# Years to aggregate
years = np.arange(1951,1953)

# Load shapefile
georegions = af.georegions_from_path(
    "/home3/dth2133/data/shapefiles/county/cb_2018_us_county_500k.shp",
    regionid='GEOID'
)

In [4]:
# Open example dataset to construct weights.
dataset = af.dataset_from_path(
    f"/home3/dth2133/data/annual/tempPrecLand2017.zarr", 
    var = 't2m',
    name = 'era5',
    georegions=georegions,
    preprocess = lambda x: (x - 273.15),
)

secondary_weights = af.pop_weights_from_path("/home3/dth2133/data/population/landscan-global-2016.tif")

# Calculate area weights.
weights = af.weights_from_objects(
    dataset,
    georegions,
    secondary_weights=secondary_weights,
    # wtype=None,
    project_dir=project_dir
)
weights.calculate_weights()

/home3/dth2133/data/clim_data_requests/maya/tmp/GridWeights/mod-c442a87c91f294c
Loading rescaled weights from cache
Cache dictionary:
{'func': 'weights', 'raster_weights': None}


In [6]:
df = pd.DataFrame()
for year in years:
    print(f"Aggregating {year}")
    
    start = time.time()
    
    dataset = af.dataset_from_path(
        f"/home3/dth2133/data/annual/tempPrecLand{year}.zarr", 
        var = 't2m',
        name = 'era5',
        georegions=georegions,
        preprocess = lambda x: (x - 273.15)
    )
    
    output_df = af.aggregate_dataset(
        dataset=dataset, 
        weights=weights,
        tavg = {
            'aggregate': {'calc':'mean', 'groupby':'day'},
            'transform': {'transform':'power', 'exp':np.arange(1,3)},
            'aggregate': {'calc':'sum', 'groupby':'year'},
        },
        bins={
            'aggregate':{'calc':'avg', 'groupby':'date'},
            'aggregate':{'calc':'bins', 'groupby':'year', 'ddargs':[[25,99,0],[30,99,0]]}
        },
        heating_dday = {
            'aggregate':{'calc':'dd', 'groupby':'date', 'ddargs':[-99,20,1]},
            'aggregate': {'calc':'sum', 'groupby':'year'},
        }
    )
    
    df = pd.concat([df, output_df], axis=0)
    end = time.time()
    print(f"Year {year} took {end-start} seconds.")



Aggregating 1951
Year 1951 took 165.1887435913086 seconds.
