## Save images of the depth slices for each reservoir
It's important to check each reservoir individually to make sure the images being used as depth-to-surface data points are good quality - eg they are a picture of just the reservoir, not incuding random flooding outside the reservoir, don't have tire tracks through them etc. This notebook will generate images for each reservoir using the same code as query_with_bounding_boxes.ipynb and save each depth of each reservoir as a png file.

In [1]:
import os
import xarray as xr
import numpy as np
import pandas as pd
import csv
import rasterio.crs
from tqdm.auto import tqdm #this one is a loading bar, it's cool to add loading bars to loops
from pandas import DataFrame
import geopandas as gpd
import matplotlib.gridspec as gs
import matplotlib.pyplot as plt
from matplotlib import pyplot
import datacube

import sys
sys.path.append('../../Scripts')
from dea_spatialtools import xr_rasterize
from dea_datahandling import wofs_fuser #this joins wofs data across tiles correctly
from datacube.utils import geometry 
from datacube.utils.geometry import CRS
from datacube.utils import masking
from datacube.helpers import ga_pq_fuser, write_geotiff
#from digitalearthau.utils import wofs_fuser
#import DEAPlotting, DEADataHandling
import warnings
warnings.filterwarnings('ignore', module='datacube')
%load_ext autoreload
%autoreload 2

  shapely_geos_version, geos_capi_version_string


In [2]:
gdf = gpd.read_file('00_Lib_bound/00_Lib_bound.shp')

query = {'time': ('01-01-1988', '09-12-2020')} 
         #'crs': 'EPSG:3577'}
dc = datacube.Datacube(app='dc-WOfS')

results = {} 

#tqdm is gonna make the bar. tqdm is Arabic abbreviation for 'progress'
for index, row in tqdm(gdf.iterrows(), total=len(gdf)):
    geom = geometry.Geometry(geom=row.geometry, crs=gdf.crs)
    query.update({'geopolygon': geom})
    
    wofs_albers= dc.load(product = 'wofs_albers', dask_chunks = {}, 
                         group_by='solar_day', fuse_func = wofs_fuser, **query) #wofs_fuser is important, it fixes thing on the edge of tiles
    
    poly_mask = xr_rasterize(gdf.iloc[[index]], wofs_albers)
    wofs_albers = wofs_albers.where(poly_mask, other=wofs_albers.water.nodata) #put other argument or all the data turns into 0
    
    results.update({str(row['gauge_ID']): wofs_albers}) #The handle for dictionary objects is the gauge ID
gdf

  0%|          | 0/153 [00:00<?, ?it/s]

Unnamed: 0,gauge_ID,NAME,staion_nam,ORIG_FID,geometry
0,TAYLORS,LAKE TAYLOR,Taylors Lake,0,"POLYGON ((142.36410 -36.82037, 142.37857 -36.7..."
1,RE604,UPPER STONY CREEK RESERVOIR,Upper Stony,1,"POLYGON ((144.19442 -37.81257, 144.21163 -37.8..."
2,sp-o10334,LAKE EILDON,EILDON,2,"POLYGON ((145.86701 -36.93337, 146.21666 -37.1..."
3,425022,LAKE MENINDEE,LAKE MENINDEE,3,"POLYGON ((142.29594 -32.24831, 142.42359 -32.3..."
4,sp-o11534,WARANGA BASIN,WARANGA BASIN,4,"POLYGON ((145.02963 -36.55203, 145.11966 -36.4..."
...,...,...,...,...,...
148,136023A,NED CHURCHWARD WEIR,Ned Churchward HW,148,"POLYGON ((151.94635 -25.14140, 152.05044 -25.0..."
149,136020A,BEN ANDERSON BARRAGE,Ben Anderson Barrage,149,"POLYGON ((152.14677 -24.97170, 152.26926 -24.8..."
150,136003C,CLAUDE WHARTON WEIR,Claude Wharton HW,150,"POLYGON ((151.52403 -25.61861, 151.59165 -25.6..."
151,125008A,MARIAN WEIR,Mirani Weir HW,151,"POLYGON ((148.82252 -21.15658, 148.92972 -21.1..."


In [None]:
#make a list of the file names so we can call them with pandas
file_list = []

directory = '00_Library'
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_list.append(os.path.join(directory, filename))

#Read the gauge files twice, once to get ID and second to get the data. Append them together in a dictionary
#May as well make a list of IDs here because we will probably use it later
data_dict = {}        
ID_list = []
#let's use tqdm again to make a progress bar. The bar is so cool I love this module
#I'm gonna use tqdm on literally all of my loops now
for i in tqdm(file_list, total=len(file_list)):
    df = pd.read_csv(i, nrows=1, escapechar='#')
    column = df.iloc[:,[1]] #This is the column with the ID in it
    ID = list(column)
    ID = ID[0]
    ID = df.at[0, ID]
    ID_list.append(str(ID))
    
    data = pd.read_csv(i, error_bad_lines = False, skiprows=9, escapechar='#',
                         parse_dates=['Timestamp'], 
                         index_col=('Timestamp'),
                        date_parser=lambda x: pd.to_datetime(x.rsplit('+', 1)[0]))
    data = data.drop(columns=['Quality Code', 'Interpolation Type'])
    data_dict.update({str(ID): data}) #Now we have the gauge data, again with the gauge ID as the handle

In [None]:
ID_list_cut = ID_list[-9:]
ID_list_cut 

In [None]:
def image_prod(ID_caller, gauge_data, wofs_albers, make_plots = False) -> 'depth slices': 
    """
    This function takes the gauge data and the wofs data,
    cloud masks the images and counts the pixels in each depth slice.
    It saves png images into the 'images/' folder.
    
    """
    #Get the depth range and intervals
    gauge_data = gauge_data.dropna()
    depth_integers = gauge_data.astype(np.int64)
    max_depth = depth_integers.Value.max()
    min_depth = depth_integers.Value.min()
    integer_array = depth_integers.Value.unique()
    integer_list = integer_array.tolist()
    
    gauge_data_xr = gauge_data.to_xarray() #convert gauge data to xarray
    merged_data = gauge_data_xr.interp(Timestamp=wofs_albers.time) #use xarrays .interp() function to merge

    images = []

    for i in tqdm(integer_list, leave = False):
        if len(integer_list) > 25: #If the depth range is more than 25 1m intervals, take every 2m instead of 1m
            specified_level = merged_data.where((merged_data.Value > i) & 
                                (merged_data.Value < i+2), drop=True)
        else:
            specified_level = merged_data.where((merged_data.Value > i) & 
                                (merged_data.Value < i+1), drop=True)


        date_list = specified_level.time.values#[:150] #caps images at x per slice (way faster)
        n_images_used = int(len(date_list))
        specified_passes = wofs_albers.sel(time=date_list).compute() #This .compute() Xarray function loads actual images
        #cloudmask (Claire Krause wrote this for me)
        #print(specified_passes.water)
        cc = masking.make_mask(specified_passes.water, cloud=True)
        ncloud_pixels = cc.sum(dim=['x', 'y'])
        # Calculate the total number of pixels per timestep
        npixels_per_slice = (specified_passes.water.shape[1] * 
                             specified_passes.water.shape[2])
        cloud_pixels_fraction = (ncloud_pixels / npixels_per_slice)
        clear_specified_passes = specified_passes.water.isel(
            time=cloud_pixels_fraction < 0.2) #has to be under 20% cloudy to pass
        wet = masking.make_mask(clear_specified_passes, wet=True).sum(dim='time')
        dry = masking.make_mask(clear_specified_passes, dry=True).sum(dim='time')
        clear = wet + dry
        frequency = wet / clear
        frequency = frequency.fillna(0)  

        #Get area from the satellite data
        #get the frequency array
        frequency_array = frequency.values
        n_images_cm = len(frequency_array)
        #Turn any pixel in the frequency array with a value greater than 0.2 into a pixel of value 1
        #if the pixel value is 0.2 or lower it gets value 0
        is_water = np.where((frequency_array > 0.2),1,0) #has to be water in more than 20% of images to count
        #give the 'frequency' xarray back its new values of zero and one
        frequency.values = is_water
        #sum up the pixels
        number_water_pixels = frequency.sum(dim=['x', 'y'])
        #get the number
        number_water_pixels = number_water_pixels.values.tolist()
        #multiply by pixel size to get area in m2
        area_m2 = number_water_pixels*(25*25)
        
        
        frequency.plot(figsize = (7,5))
        name = ID_caller, i
        images = plt.savefig('images/'+str(name)+'.png')

        #Plotting the image
        if make_plots:
            frequency.plot(figsize = (7,5))
    del wofs_albers
    del specified_passes
    del cc
    del clear_specified_passes
    del wet
    del dry
    del clear
    del frequency
    #delete the images when you finish each reservoir (otherwise the memory will run out and the kernel will break)
    return images

In [None]:
array_list = []


def listsplit(N, K=1):
    length = len(N)
    return [N[i*length/K: (i+1)*length/K] for i in range(K)]


for ID in tqdm(ID_list_cut, total=len(ID_list)):
    print("Working on gauge ", ID)
    if (ID in data_dict.keys()) and (ID in results.keys()):
        data = image_prod(ID, data_dict[ID], results[ID], make_plots = False)
        array_list.append(data)
        
        del data
    else:
        print('we didnt find', ID)     