In [1]:
import os
import matplotlib.pyplot as plt
import glob
import rasterio
from rasterio.merge import merge
from rasterio.plot import show
from rasterio.mask import mask
from rasterstats import zonal_stats
import fiona
import geopandas as gpd
import xarray as xr
import numpy as np
import rioxarray
import pandas as pd
import dask

# Create Land Cover Masks

In [None]:
def create_land_mask(in_dir, out_dir, lc_num):
    files=np.array(sorted(os.listdir(in_dir)))
    
    for filename in files:
        base = os.path.splitext(filename)[0]
        print(base)
        rast = rasterio.open(in_dir+filename)
        meta = rast.meta
        meta.update(dtype = 'float32', nodata = -3.4e+38)
        rast.close()
        
        with rasterio.open(out_dir+base+'_' + str(lc_num) + '.tif', 'w', **meta) as dst:
            with rasterio.open(in_dir+filename) as src:
                data = src.read()
                data = data.astype(np.float32)
                data[data!=lc_num] = np.nan
                data[data==lc_num] = 1
                dst.write(data)

In [None]:
landcover_in_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Original/'
landcover_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Savannas_Mask/'
lc_class = 9



In [None]:
create_land_mask(landcover_in_dir, landcover_out_dir, lc_class)

In [None]:
lc_test = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Savannas_Mask/MCD12C1_T1_2016_9.tif')

lc_array = lc_test.read()
lc_test.meta

In [None]:
savannas_test = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Savannas_Mask/MCD12C1_T1_2014_9.tif')
savannas_test.meta


In [None]:
savannas_test.read()

In [None]:
max(np.unique(savannas_test.read()))

In [None]:
lc_array

In [None]:
show(savannas_test)

In [None]:
show(lst_test)

In [None]:
chirps_test.meta

In [None]:
show(chirps_test)

In [None]:
grassland_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Grassland_Mask/'
cropland_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Cropland_Mask_1/'
cropnatveg_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/CropNatVeg_Mask/'
savannas_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/Savannas_Mask/'
woodysavannas_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/WoodySavannas_Mask/'
evergreenbroad_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/EvergreenBroad_Mask/'
deciduousbroad_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/DeciduousBroad_Mask/'
openshrublands_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/OpenShrublands_Mask/'
closedshrublands_mask_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/MCD12C1/ClosedShrublands_Mask/'

grassland_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Grasslands/Version2/'
cropland_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Croplands/Version3/'
cropnatveg_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_CropNatVeg/'
savannas_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Savannas/' 
woodysavannas_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_WoodySavannas/'
evergreenbroad_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_EvergreenBroad/'
deciduousbroad_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_DeciduousBroad/'
openshrublands_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_OpenShrublands/'
closedshrublands_masked_out_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_ClosedShrublands/'

all_dirs = ['/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRPS/Resampled/', '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRTS/Dekads/', '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/eMODIS_NDVI/Resampled/', '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/FLDAS_SM/Dekads/', '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/Hobbins_ET/Resampled/', '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/LST/Resampled/']

In [None]:
def rastercalc_lcmask(lcmask_dir, param_dirs, out_dir):
    """
    
    """
    lcfiles=np.array(sorted(os.listdir(lcmask_dir)))
    
    
    for yr in range(2002,2017):
        lc_yr = [file for file in lcfiles if str(yr) in file]
        for lc in lc_yr:
            lc_mask = rasterio.open(lcmask_dir + lc)
            for folder in all_dirs:
                var_yr = [file for file in sorted(os.listdir(folder)) if str(yr) in file]
                for var in var_yr:
                    var_rast = rasterio.open(folder+var)
                    meta = var_rast.meta
                    var_lcmask = var_rast.read(1)*lc_mask.read(1)
                    with rasterio.open(out_dir + 'savannas_' + var, 'w', **meta) as dst:
                            dst.write(var_lcmask, 1)

In [None]:
rastercalc_lcmask(savannas_mask_dir, all_dirs, savannas_masked_out_dir)

In [None]:
chirps_2016 = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRPS/Resampled/rs_chirps-v2.0.2016.08.3.tif')


In [None]:
show(chirps_2016)

In [None]:
lst_2016 = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/LST/Resampled/rs_lst.2002.081.tif')

In [None]:
np.min(lst_2016.read())

In [None]:
np.nanmin(chirps_2016.read(1))

In [None]:
chirps_savanna = lc_test.read(1)*chirps_2016.read(1)

In [None]:
show(chirps_savanna)

In [None]:
np.nanmin(chirps_savanna)

In [None]:
chirps_test = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Savannas/savannas_rs_chirps-v2.0.2016.08.3.tif')
lst_test = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Savannas/savannas_rs_lst.2002.081.tif')

In [None]:
lst

In [None]:
chirps_array = chirps_test.read()
show(chirps_array)

In [None]:
lst_array = lst_test.read()
show(lst_array)

In [None]:
chirps_array

# Reassign cloud mask value before calculating stats

In [2]:
def reassign_masks(in_dir):
    files=np.array(sorted(os.listdir(in_dir)))
    tifs = pd.Series(files).str.contains('.tif')
    files = files[tifs]

    for filename in files:
        with rasterio.open(in_dir+filename, 'r+') as ds:
            print(filename)
            a = ds.read()# read all raster values
            a[a < 0 ] = np.nan  
            ds.write(a)

In [None]:
variables_mask_in_dir = 

#'/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Croplands/Version3/'



In [None]:
reassign_masks(variables_mask_in_dir)

In [None]:
raster_test = rasterio.open('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Croplands/Version3/croplands_rs_lst.2003.083.tif')

raster_test.meta

In [None]:
show(raster_test)

In [None]:
raster_array = raster_test.read()
raster_array

#raster_test.meta

# Rasterstats with Admin Boundaries

In [None]:
def zone_stat(raster, band, polygon, stats):
    """
    This function will calculate the zonal stats for each polygon within a raster
    requires gpd_df, raster, object and nodata value
    
    Args: raster = input raster
          band = band of raster
          polygon = polygons to calc zonal stats 
          stats = stat to calculate as string
    """
    
    band = raster.read(band)
    zone_stat = zonal_stats(polygon, band, affine=raster.meta['transform'], nodata = np.nan, stats = stats)
    return zone_stat

In [None]:
def var_poly_join(in_dir, gdf):
    files=np.array(sorted(os.listdir(in_dir)))
    tifs = pd.Series(files).str.contains('.tif')
    files = files[tifs]
    
    for filename in files:
        print(filename)
        raster = rasterio.open(in_dir+filename)
        stats = zone_stat(raster, 1, gdf, 'mean')
        print((list(stats))[-1])
        name = os.path.splitext(os.path.basename(filename))[0]
        gdf['Mean'+ "_" + name] = gpd.GeoDataFrame.from_dict(stats)
    return gdf

In [None]:
lcmasked_in_dir = cropland_masked_out_dir 

In [None]:
adminbds = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/AdminBoundaries/Africa_zones_2019/g2008_af_1.shp'
bndry = gpd.read_file(adminbds)

ea_adminbds = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/AdminBoundaries/gadm36_EastAfrica.shp'
ea_bndry = gpd.read_file(ea_adminbds)

oromia = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/AdminBoundaries/Ethiopia/oromia.shp'
oromia_bndry = gpd.read_file(oromia)

In [None]:
oromia_bndry

In [None]:
output_shp = var_poly_join(lcmasked_in_dir, oromia_bndry)

In [None]:
output_df = pd.DataFrame(output_shp.drop(columns='geometry'))

In [None]:
output_df.head()

In [None]:
output_df.to_csv('/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/Final_TS/'+'oromia_croplands_1.csv')



# Trying dask delayed

In [None]:
@dask.delayed
def read_and_mean(filename, in_dir, admin_gdf):
    """
    input: 
    in_dir - directory of masked land cover type
    filename - a raster variable for particular dekad
    admin_gdf - administrative boundary layer of Africa as geodataframe
    This function opens each raster and extract zonal mean for each variable
    of a particular land cover type
    """
    print(filename)
    raster = rasterio.open(in_dir+filename)
    mean = zone_stat(raster, 1, admin_gdf, 'mean')
    name = os.path.splitext(os.path.basename(filename))[0]
    return {'Mean'+ "_" + name : mean}

In [None]:
files=np.array(sorted(os.listdir(in_dir)))
tifs = pd.Series(files).str.contains('.tif')
files = files[tifs]

admin_mean_list = []
for filename in files:
    zmeans = read_and_mean(filename, in_dir, bndry)
    admin_mean_list.append(zmeans)


In [None]:
np.nanx

In [None]:
from dask.distributed import Client
client = Client()
client

In [None]:
client.compute(admin_mean_list,scheduler = 'processes')

In [None]:
#var_grass_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/Variables_Croplands/'
#year_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/Variables_Grasslands/2016/'

In [None]:
#output_df = output_df[output_df.columns.drop(list(output_df.filter(regex='2014')))] 
#if you want to reverse and remove a specific year from the appended dataframe

# Remove nan pixels from lst

In [None]:
lst_test = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/VariablesByLandCover/Variables_Clos/Version2/cropland_rs_chirps-v2-Copy1.0.2002.07.1.tif'

In [None]:
lst_array = lst_test.read(1)

In [None]:
lst_array.min()

In [None]:
with rasterio.open(lst_test, 'r+') as ds:
    a = ds.read()# read all raster values
    #lst_array[np.where(lst_array<=0)]
    a[a < 0] = 0  #set all values not cropland as 0
    ds.write(a)

In [None]:
lst_test2 = rasterio.open(lst_test)
lst_arr = lst_test2.read()
lst_arr.min()

In [None]:
len(lst_arr[np.where(lst_arr<0)])

In [None]:
show(lst_test2)

In [None]:
lst_mask = lst_test.read_masks(1)

In [None]:
lst_mask

In [None]:
lst_array.shape

In [None]:
lst_array[np.where(lst_array<=0)]

In [None]:
np.where(lst_array<=0)

In [None]:
with rasterio.open(lc_mask, 'r+') as ds:
    a = ds.read()  # read all raster values
    a[a!=4] = 0  #set all values not cropland as 0
    a[a==4] = 1
    ds.write(a)

In [None]:
def create_land_mask(in_dir, out_dir, lc_num):
    files=np.array(sorted(os.listdir(in_dir)))
    
    for filename in files:
        base = os.path.splitext(filename)[0]
        print(base)
        with rasterio.open(in_dir+filename, 'r+') as ds:
            rast = ds.read()
            rast[rast!=lc_num] = np.nan
            rast[rast==lc_num] = 1
            meta = ds.meta()
            meta.update(
                dtype='float32')
            ds.write(rast)
                
                
                
#         rast = rasterio.open(in_dir+filename)
#         meta = rast.meta
#         meta.update(
#             dtype='float32')
#         #rast.close()
#         print(meta)
   
        
#         with rasterio.open(out_dir+base+'_' + str(lc_num) + '.tif', 'w', **meta) as dst:
#             #with rasterio.open(in_dir+filename) as src:
#                 data = rast.read()
#                 data[data!=lc_num] = -9999
#                 data[data==lc_num] = 1
#                 dst.write(data)