# Annual Avg
Cascade Tuholske Aug. 2024  

Notebook to stack daily climate data and find the annual average of the data.

In [1]:
# Dependencies
import os
import glob
import dask
import xarray as xr
import numpy as np
import rioxarray 
import rasterio
import matplotlib.pyplot as plt
import pandas as pd
from dask.distributed import Client, LocalCluster

In [2]:
# Files
year = '2016'
path = os.path.join('/home/cascade/CHIRTS/UHE-daily/wbgtmax/')
fns = sorted(glob.glob(path + str(year) + '/*.tif'))
print(fns[0])
print(len(fns))

/home/cascade/CHIRTS/UHE-daily/wbgtmax/2016/wbgtmax.2016.01.01.tif
366


In [None]:
# Start Dask Cluster 
cluster = LocalCluster(n_workers=20, threads_per_worker = 1)
client = Client(cluster)

In [None]:
# Open rasters as Dask Arrays in an Xarray Dataset 

def open_raster(filepath):
    return rioxarray.open_rasterio(filepath, chunks='auto')

# Open the raster files individually
datasets = [open_raster(fn) for fn in fns]

# Combine the datasets
combined = xr.concat(datasets, dim='band')  # Adjust 'dim' as per your requirement

In [None]:
# Check the data
combined

In [None]:
# get the mean
mean = combined.mean(dim = 'band').compute()

In [None]:
# Check the data
plt.imshow(mean.data)
plt.colorbar()

In [None]:
mean.data

In [None]:
# Get data to write out 
arr_out = np.nan_to_num(mean, nan = -9999)
arr_out

In [None]:
# meta data
meta = rasterio.open(fns[0]).meta
meta['nodata'] = -9999
meta

In [None]:
# write it out 
fn_out = os.path.join('../data/Nina-PNAS-Data/GHE-wbgtmax-avg-' + str(year) + '.tif')
with rasterio.open(fn_out, 'w', **meta) as out:
    out.write_band(1, arr_out)

In [None]:
# shut down the cluster
client.close()