## EarthEngine GeoTiff naming convention
If you are exporting to GeoTIFF(s), the image is split into tiles. The filename of each tile will be in the form baseFilename-yMin-xMin where xMin and yMin are the coordinates of each tile within the overall bounding box of the exported image.

In [1]:
import dask.array as da
import rasterio
import numpy as np
import h5py
#need to import the warp module seperately
import rasterio.warp
import rasterio.merge
import matplotlib.pyplot as plt

In [None]:
#import the surface water dataset
datasets = []
left = 0
bottom = 0
right = -100
top = -100
for x in range(1,13):
    
    dataset = rasterio.open('./GlobalAvulsionData/AltiPlanoProccessOrder/AltiPlanoYearly'+str(x)+'.tif')
    print(dataset.bounds)
    left1 = dataset.bounds[0]
    bottom1 = dataset.bounds[1]
    right1 = dataset.bounds[2]
    top1 = dataset.bounds[3]
    #Get the bounds of the whole study area before cropping
    if left1 < left:
        left = left1
    if bottom1 < bottom:
        bottom = bottom1
    if right1 > right:
        right = right1
    if top1 > top:
        top = top1
        
    #append to dataset list
    datasets.append(dataset)

In [None]:
print(left,bottom,right,top)

In [None]:
Tran = rasterio.transform.from_bounds(left, bottom, right, top, 26580, 37354)
rasterio.transform.xy(Tran, 1, 1, offset='center')

In [None]:
for x in range(1,36):
    for y in range(len(datasets)):
        print(y)
        dataset = datasets[y]
        arr = dataset.read(x)
        if y < 3:
            if y == 0:
                fRow = arr
            else:
                fRow = np.concatenate([fRow,arr],1)
        if y >= 3 and y < 6:
            if y == 3:
                sRow = arr
            else:
                sRow = np.concatenate([sRow,arr],1)
        if y >=6 and y < 9: 
            if y == 6:
                tRow = arr
            else:
                tRow = np.concatenate([tRow,arr],1)
        if y >= 9:
            if y == 9:
                foRow = arr
            else:
                foRow = np.concatenate([foRow,arr],1)
                
    tallFRow = fRow.reshape((1,)+fRow.shape)
    tallSRow = sRow.reshape((1,)+sRow.shape)
    tallTRow = tRow.reshape((1,)+tRow.shape)
    tallFoRow = foRow.reshape((1,)+foRow.shape)

    OneYrOcc = np.hstack([tallFRow,tallSRow,tallTRow,tallFoRow])
        
    if x == 1:
        hf = h5py.File('AltiplanoV2.h5', 'w')
        hf.create_dataset('Altiplano',data=OneYrOcc, dtype='u2', compression="gzip", chunks=True, maxshape=(None,None,None))
    if x > 1:
        hf["Altiplano"].resize((hf["Altiplano"].shape[0] + 1), axis = 0)
        hf["Altiplano"][-1:] = OneYrOcc
#call this to write to disk
hf.close()

In [None]:
#use line below once to reopen the dataset after writing
#hf = h5py.File('Altiplano.h5','w')
d = hf['./Altiplano']          # Pointer on on-disk array

In [None]:
OccIm = da.max(d,axis=0)

In [None]:
from PIL import Image
im = Image.fromarray(d[15])
im.save("your_file.tif")

In [None]:
f = h5py.File('OccIm.hdf5', mode='a')

In [None]:
dset = f.create_dataset('/data', shape=OccIm.shape, chunks=True)

In [None]:
da.store(OccIm,dset)

In [3]:
#now import the mask dataset and merge together. Remember! you are creating the mask around the cropped data you
#ended up using not for the full dataset you import above.
data1 = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SD19.tif')
data2 = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SD20.tif')
data3 = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SE19.tif')
data4 = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SE20.tif')

In [None]:
#get data shape and bounds
print(data2.read(1).shape)
print(data2.bounds)

In [4]:
#Source transforms for the 3 datasets.
SrcTran1 = rasterio.transform.from_bounds(173190.0, 8228820.0, 826650.0, 8673540.0, 21782, 14824)
SrcTran2 = rasterio.transform.from_bounds(173190.0, 8228820.0, 826650.0, 8673540.0, 21782, 14824)
SrcTran3 = rasterio.transform.from_bounds(178800.0, 7785780.0, 821040.0, 8231160.0, 21408, 14846)
SrcTran4 = rasterio.transform.from_bounds(178800.0, 7785780.0, 821040.0, 8231160.0, 21408, 14846)

In [5]:
#create a transform for the 2 UTM zones into WGS84 lat long(EPSG:4326)
#UTM zone 19S id: EPSG:32719
crs19 = rasterio.crs.CRS.from_epsg(32719)
#UTM zone 20S id: EPSG:32720
crs20 = rasterio.crs.CRS.from_epsg(32720)
crsWGS = rasterio.crs.CRS.from_epsg(4326)
#transform for data1
UTM19tran1,newwidth1,newheight1 = rasterio.warp.calculate_default_transform(crs19,crsWGS,21782,14824,left=173190.0, bottom=8228820.0, right=826650.0, top=8673540.0)
#make destination array based off newwidth and newheight
data1tran = np.zeros((newheight1,newwidth1))

#transform for data2
UTM20tran1,newwidth2,newheight2 = rasterio.warp.calculate_default_transform(crs20,crsWGS,21782,14824,left=173190.0, bottom=8228820.0, right=826650.0, top=8673540.0)
#make destination array based off newwidth and newheight
data2tran = np.zeros((newheight2,newwidth2))

#transform for data3
UTM19tran2,newwidth3,newheight3 = rasterio.warp.calculate_default_transform(crs19,crsWGS,21408,14846,left=178800.0, bottom=7785780.0, right=821040.0, top=8231160.0)
#make destination array based off newwidth and newheight
data3tran = np.zeros((newheight3,newwidth3))

#transform for data4
UTM20tran2,newwidth4,newheight4 = rasterio.warp.calculate_default_transform(crs20,crsWGS,21408,14846,left=178800.0, bottom=7785780.0, right=821040.0, top=8231160.0)
#make destination array based off newwidth and newheight
data4tran = np.zeros((newheight4,newwidth4))

In [6]:
#reproject data 1
rasterio.warp.reproject(data1.read(),destination = data1tran,src_transform=SrcTran1,dst_transform=UTM19tran1,src_crs=crs19,dst_crs=crsWGS)
#reproject data 2
rasterio.warp.reproject(data2.read(),destination = data2tran,src_transform=SrcTran2,dst_transform=UTM20tran1,src_crs=crs20,dst_crs=crsWGS)
#reproject data 3
rasterio.warp.reproject(data3.read(),destination = data3tran,src_transform=SrcTran3,dst_transform=UTM19tran2,src_crs=crs19,dst_crs=crsWGS)
#reproject data 4
rasterio.warp.reproject(data4.read(),destination = data4tran,src_transform=SrcTran4,dst_transform=UTM20tran2,src_crs=crs20,dst_crs=crsWGS)

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 Affine(0.00027941101328185643, 0.0, -66.06942332184036,
        0.0, -0.00027941101328185643, -15.978193309871381))

In [7]:
#now rewrite reprojected data using your new transforms so that you can merge them. Only need to do this once!!!
data1reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SD19WGS84.tif',
                            'w', driver='GTiff', height=data1tran.shape[0], width=data1tran.shape[1], count=1,
                            dtype=data1tran.dtype, crs=crsWGS, transform=UTM19tran1)
data1reproj.write(data1tran,1)
#need to close to sync written data to disk
data1reproj.close()

data2reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SD20WGS84.tif',
                            'w', driver='GTiff', height=data2tran.shape[0], width=data2tran.shape[1], count=1,
                            dtype=data2tran.dtype, crs=crsWGS, transform=UTM20tran1)
data2reproj.write(data2tran,1)
data2reproj.close()

data3reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SE19WGS84.tif',
                            'w', driver='GTiff', height=data3tran.shape[0], width=data3tran.shape[1], count=1,
                            dtype=data3tran.dtype, crs=crsWGS, transform=UTM19tran2)
data3reproj.write(data3tran,1)
data3reproj.close()

data4reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SE20WGS84.tif',
                            'w', driver='GTiff', height=data4tran.shape[0], width=data4tran.shape[1], count=1,
                            dtype=data4tran.dtype, crs=crsWGS, transform=UTM20tran2)
data4reproj.write(data4tran,1)
data4reproj.close()

In [8]:
#now reopen synced data in 'r' mode and merge datasets
data1reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SD19WGS84.tif')
data2reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SD20WGS84.tif')
data3reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SE19WGS84.tif')
data4reproj = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/TestRegionMaskData/SE20WGS84.tif')

#cropping actually gets done in this line! The second argument allows you to set bounds that are different than the merged data bounds
#bounds of your cropped surface water data:(-68.6533413469566,-17.202782606653027,-64.611192063004,-13.160633322700418)
mergDat,mergedtransform = rasterio.merge.merge([data1reproj,data2reproj,data3reproj],(-68.6533413469566,-17.202782606653027,-64.611192063004,-13.160633322700418))

In [9]:
#reproject one more time to get the scales to match up exactly (the two transforms are .00001 off in lat and long)
mergDatReProj = np.zeros((14999,14999))
mertran = rasterio.transform.from_bounds(-68.6533413469566,-17.202782606653027,-64.611192063004,-13.160633322700418, 14999, 14999)
rasterio.warp.reproject(mergDat,destination = mergDatReProj,src_transform=mergedtransform,dst_transform=mertran,src_crs=crsWGS,dst_crs=crsWGS)

(array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 Affine(0.0002694945852358555, 0.0, -68.6533413469566,
        0.0, -0.00026949458523585636, -13.160633322700418))

In [11]:
left = -71.61791653184363 
bottom = -23.22719931230798 
right = -64.45475045627458 
top = -13.1604985754078
Tran = rasterio.transform.from_bounds(left, bottom, right, top, 26580, 37354)
Tran

Affine(0.000269494585235856, 0.0, -71.61791653184363,
       0.0, -0.0002694945852358564, -13.1604985754078)

In [12]:
#now save the georeferenced river mask for your study area!
mergDatmask = mergDatReProj
mergDatmask[mergDatmask != 255] = 0
regionmask = rasterio.open('/home/dylan/GlobalAvulsion/GlobalAvulsionData/RiverMaskData/FinalTestRegionMask.tif',
                            'w', driver='GTiff', height=14999, width=14999, count=1,
                            dtype=mergDatmask.dtype, crs=crsWGS, transform=mertran)
regionmask.write(mergDatmask,1)
regionmask.close()

In [14]:
mergDatReProj.shape

(14999, 14999)