# Merge ANUCLIM Rainfall with Satellite Rainfall

Australia's has high resolution gridded datasets built from spatial interpolation of the extensive weather stations across Aus. However, in locations where the the station denisity is low, the interpolations are suspect. IN these location we will attempt to replace the sptially interpoled gridded data with satellite retriveals of rainfall.

In [None]:
import shapely
import xarray as xr
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt

from odc.geo.xr import assign_crs
from odc.geo.geobox import GeoBox
from odc.geo.xr import xr_reproject

import sys
sys.path.append('/g/data/os22/chad_tmp/dea-notebooks/Tools')
from dea_tools.spatial import xr_rasterize

## Read weather stations list, filter, then convert to geopandas, clip to Aus mainland

In [None]:
df = pd.read_csv('/g/data/os22/chad_tmp/NEE_modelling/data/weather_stations.csv')
df.head()

In [None]:
# filter datasets to stations that existed for at least as long as half the study period
df['End'] = np.where(df['End']=='..', 2022, df['End'])
df['End'] = df['End'].astype(int)
df = df[df['End']>=2010]

In [None]:
aus.plot()

In [None]:
#convert to geopandas
gdf = gpd.GeoDataFrame(
    df, geometry=gpd.points_from_xy(df.Lon, df.Lat))
gdf.crs='EPSG:4326'

#clip to Aus mainland
aus = gpd.read_file('/g/data/os22/chad_tmp/NEE_modelling/data/aus_bbox.geojson')
gdf = gdf.clip(aus)
gdf

### Buffer points by 50 km

In [None]:
gdf = gdf.to_crs('epsg:3577')
gdf['geometry'] = gdf.geometry.buffer(100000)#.plot(figsize=(10,10))
gdf = gdf.to_crs('epsg:4326')

### Rasterize buffered points on 1km grid

In [None]:
ds = xr.open_dataset('/g/data/os22/chad_tmp/NEE_modelling/data/1km/EVI_1km_monthly_2002_2021.nc')
da_stations = xr_rasterize(gdf, ds)

In [None]:
da_stations.plot.imshow(size=10)

## Replace rainfall at low density stations regions

Open ANUClim rainfall and 

In [None]:
chirps = xr.open_dataarray('/g/data/os22/chad_tmp/NEE_modelling/data/5km/chirps_5km_monthly_1991_2021.nc').isel(time=-1)
chirps = xr_reproject(chirps, ds.odc.geobox, resampling='bilinear')
chirps['latitude'] = chirps.latitude.astype('float32')
chirps['longitude'] = chirps.longitude.astype('float32')

anu = xr.open_dataset('https://dapds00.nci.org.au/thredds/dodsC/gh70/ANUClimate/v2-0/stable/month/rain/2021/ANUClimate_v2-0_rain_monthly_202112.nc')
anu = assign_crs(anu, crs='EPSG:4283')
anu = anu.rain.squeeze()
anu.attrs['nodata'] = np.nan

anu = xr_reproject(anu, ds.odc.geobox, resampling='bilinear')
anu['latitude'] = anu.latitude.astype('float32')
anu['longitude'] = anu.longitude.astype('float32')

### Rasterize and clip to extent of ANU Rainfall

In [None]:
merged_rainfall = xr.where(da_stations==0, chirps, anu)

In [None]:
merged_rainfall.plot.imshow(vmax=500, size=10, robust=True)

In [None]:
(merged_rainfall - anu).plot.imshow(robust=True, size=10)

In [None]:
xmin, ymin, xmax, ymax

----

## Create 1x1 degree grid over Aus

In [None]:
#how many degrees shiuld the cell size be?
cell_size = 22

# total area for the grid
xmin, ymin, xmax, ymax= aus.total_bounds

n_cells = cell_size*(xmax-xmin)
print(n_cells)

# projection of the grid
crs = gdf.crs
# create the cells in a loop
grid_cells = []
for x0 in np.arange(xmin, xmax+cell_size, cell_size ):
    for y0 in np.arange(ymin, ymax+cell_size, cell_size):
        # bounds
        x1 = x0-cell_size
        y1 = y0+cell_size
        grid_cells.append(shapely.geometry.box(x0, y0, x1, y1)  )

cell = gpd.GeoDataFrame(grid_cells, columns=['geometry'], 
                                 crs=crs)
cell = cell.clip(aus) 

ax = gdf.plot(markersize=.1, figsize=(12, 8))
plt.autoscale(False)
cell.plot(ax=ax, facecolor="none", edgecolor='grey')
ax.axis("off")

# merged = gpd.sjoin(gdf, cell, how='left', predicate='within')

# # make a simple count variable that we can sum
# merged['n_stations']=1

# # Compute stats per grid cell -- aggregate stations to grid cells with dissolve
# dissolve = merged.dissolve(by="index_right", aggfunc="count")

# # put this into cell
# cell.loc[dissolve.index, 'n_stations'] = dissolve.n_stations.values

# #convert cell with NaN to zero
# cell['n_stations'] = cell['n_stations'].fillna(0)

# ax = cell.plot(column='n_stations', figsize=(15, 10), cmap='viridis',edgecolor="grey", vmax=20, vmin=0, legend=True)
# plt.autoscale(False)
# world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# world.to_crs(cell.crs).plot(ax=ax, color='none', edgecolor='black');
# # ax.axis('off')

In [None]:
cell[2:].to_file('/g/data/os22/chad_tmp/NEE_modelling/notebooks/tiles_aus.geojson')

In [None]:
cell[2:].explore()