In [55]:
import os
import datetime
from pyprojroot import here
import pandas as pd
from osgeo import gdal
import numpy as np

In [113]:
# list all the ECOSTRESS files
file_list = [s for s in os.listdir(str(here("./data/raw/ECOSTRESS/"))) if s.endswith('.tif')]

In [143]:
str(here("./data/raw/ECOSTRESS/"))

'/Users/annaboser/Documents/GitHub/ET_agriculture/data/raw/ECOSTRESS'

In [147]:
# reproject each raster to the CA_grid's GeoTransform
CA_grid = gdal.Open(str(here("./data/intermediate/CA_grid.tif")))

# get the pixel size of the CA grid
CA_grid.GetGeoTransform() # https://svn.osgeo.org/gdal/trunk/autotest/alg/reproject.py

# make a new location for the reprojected images
outdir = str(here("./data/intermediate/ECOSTRESS/resampled/"))
if not os.path.exists(outdir):
    os.makedirs(outdir)

# define function to change pixel size
def CA_pixel(file):
    input_dir = str(here("./data/raw/ECOSTRESS/"))
    output_dir = str(here("./data/intermediate/ECOSTRESS/resampled/"))
    # command = "gdalwarp -tr 5 -5 inputRaster.tif outputRaster.tif"
    gdal.Warp(output_dir + "/" + file, input_dir + "/" + file, xRes=0.0006309954707866042, yRes=0.0006309954708085768) # https://stackoverflow.com/questions/7719651/how-can-i-change-the-resolution-of-a-raster-using-gdal
    return None

for file in file_list: 
    CA_pixel(file)

In [148]:
# function to parse the file names to return the timestamp or whether it's an ET or uncertainty measure
def parse_filename(file_string,result='timestamp'):
    (root_string,variable,method,source,raster_type,timestamp,end_string) = file_string.split('_')
    if result == 'timestamp':
        return datetime.datetime.strptime(timestamp[3:], "%Y%j%H%M%S")
    elif result == 'raster_type':
        return raster_type
    else:
        return None   

In [149]:
# create a dataframe with each file and time and type information
df = pd.DataFrame(file_list, columns=['name'])
df['timestamp'] = df['name'].apply(parse_filename)
df['raster_type'] = df['name'].apply(parse_filename, result='raster_type')
df['year'] = df['timestamp'].apply(lambda x: x.year)
df['doy'] = df['timestamp'].apply(lambda x: x.timetuple().tm_yday)
df['fullname'] = df['name'].apply(lambda x: str(here("./data/intermediate/ECOSTRESS/resampled/")) + "/" + x)
df['monthday'] = df['timestamp'].apply(lambda x: int(str(x.month)+str(x.day).zfill(2)))

bins = [0, 115, 315, 515, 715, 915, 1115, 1300]
labels = ['NDJ', 'JFM', 'MAM', 'MJJ', 'JAS', 'SAN', 'NDJ']
df['monthgroup'] = pd.cut(df.monthday, bins, labels = labels, include_lowest = True, ordered = False)

In [150]:
df.head()

Unnamed: 0,name,timestamp,raster_type,year,doy,fullname,monthday,monthgroup
0,ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETin...,2019-01-28 01:06:58,ETinstUncertainty,2019,28,/Users/annaboser/Documents/GitHub/ET_agricultu...,128,JFM
1,ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETin...,2019-02-11 18:42:30,ETinstUncertainty,2019,42,/Users/annaboser/Documents/GitHub/ET_agricultu...,211,JFM
2,ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETin...,2019-02-02 21:34:59,ETinst,2019,33,/Users/annaboser/Documents/GitHub/ET_agricultu...,202,JFM
3,ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETin...,2019-02-12 01:11:38,ETinst,2019,43,/Users/annaboser/Documents/GitHub/ET_agricultu...,212,JFM
4,ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETin...,2019-02-12 17:50:55,ETinst,2019,43,/Users/annaboser/Documents/GitHub/ET_agricultu...,212,JFM


In [151]:
# create the folder to save the merged outputs if it doesn't exist yet
outdir = str(here("./data/intermediate/ECOSTRESS/monthgroup_averaged"))
if not os.path.exists(outdir):
    os.makedirs(outdir)

In [152]:
# get the unique days and raster types you need to create a merged raster for
years = df.year.unique()
monthgroups = df.monthgroup.unique()
raster_types = df.raster_type.unique()

In [169]:
# function to average all the files together using GDAL
def gdal_calc_files(file_list, calc, output=None):
    if output:
        file_string = " ".join(file_list)
        # print("Averaging {files}".format(files=file_string))
        print("Output {output}".format(output=output))
        command = "/usr/local/bin/gdal_calc.py -A {file_string} --outfile {output} --extent=union --debug --calc={calc}".format(
            output=output, 
            file_string=file_string, 
            calc = calc)
#         command = "gdal_merge.py -o {output} -of gtiff ".format(output=output) + file_string # this is a gdal command I know that works and it does create new files with this script. 
        print("Command: {command}".format(command=command))
    else:
        raise ValueError("Must provide output filename as argument")
    print(os.popen(command).read())
    return None

In [170]:
# take the average an pixel count
for year in years:
    for monthgroup in monthgroups:
        for raster_type in raster_types:

            # select the files with the correct time and raster type
            select_file_list = (df[
                (df['year'] == year) & 
                (df['monthgroup'] == monthgroup) & 
                (df['raster_type'] == raster_type)
            ].fullname.to_list())
            
            # define the name of the avg file to save to
            merged_output_filename_avg = "avg_{year}_{monthgroup}_{raster_type}.tif".format(
                raster_type=raster_type,
                monthgroup=monthgroup,
                year=year)
            
            calc = "'numpy.average(A, axis=0)'"
            
            # average and save
            gdal_calc_files(select_file_list, calc, output=outdir + "/" + merged_output_filename_avg)
            
#             # define the name of the count file to save to
#             merged_output_filename_count = "ncount_{year}_{monthgroup}_{raster_type}.tif".format(
#                 raster_type=raster_type,
#                 monthgroup=monthgroup,
#                 year=year)
            
#             calc = "'numpy.sum(A>-3.4e+38, axis=0)'"
            
#             # count and save
#             gdal_calc_files(select_file_list, calc, output=outdir + "/" + merged_output_filename_count)

Output /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/monthgroup_averaged/ncount_2019_JFM_ETinstUncertainty.tif
Command: /usr/local/bin/gdal_calc.py -A /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinstUncertainty_doy2019028010658_aid0001.tif /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinstUncertainty_doy2019042184230_aid0001.tif /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinstUncertainty_doy2019029001615_aid0001.tif /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinstUncertainty_doy2019027002130_aid0001.tif /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSP

In [164]:
import subprocess
import shlex
import sys
merged_output_filename = "trial.tif"
output= "/Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/monthgroup_averaged_trial/" + merged_output_filename
file_string = "/Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019010150436_aid0001.tif /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019026011306_aid0001.tif"
command = "/usr/local/bin/gdal_calc.py -A {file_string} --outfile {output} --extent=union --debug --calc='numpy.sum(A!=-3.4e+38, axis=0)'".format(
    output=output, 
    file_string=file_string)
print(command)
print(os.popen(command).read())
# args = command.split(' ')
# args2 = [sys.executable] + shlex.split(command)
# print(args2)
# # command = "gdal_merge.py -o {output} -of gtiff ".format(output=output) + file_string
# # command = "gdal_calc.py -A {file_string} -o {output} -of gtiff  --extent=fail --debug --calc=\"numpy.sum(!is.na(A),axis=0)\"".format(
# #     output=output, 
# #     file_string=file_string)
# #subprocess.call(args2)
# print(args2)
# proc = subprocess.Popen(args,  stdout=subprocess.PIPE,stderr=subprocess.PIPE)
# stdout,stderr=proc.communicate()
# print("STDOUT: ",stdout)
# print("STDERR:" ,stderr)

/usr/local/bin/gdal_calc.py -A /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019010150436_aid0001.tif /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019026011306_aid0001.tif --outfile /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/monthgroup_averaged_trial/trial.tif --extent=union --debug --calc='numpy.average(A, axis=0)'
gdal_calc.py starting calculation ['numpy.average(A, axis=0)']
file A: /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019010150436_aid0001.tif, dimensions: 8579, 2096, type: Float32
file A: /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019026011306_aid0001.tif, dimensions

In [None]:
import matplotlib.pyplot as plt
img = rasterio.open(here("./data/intermediate/PET/PET_yeargrouped_avg.tif")) # NA values are -3.4e+38
array = np.array(img.read())
plt.figure()
plt.imshow(array[0,:,:])
plt.colorbar()
plt.show()

In [168]:
command = "/usr/local/bin/gdal_calc.py -A /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019207003845_aid0001.tif --outfile /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/monthgroup_averaged/ncount_2019_JAS_ETinst.tif --extent=union --debug --calc='numpy.sum(A>-3.4e+38, axis=0)'"
print(os.popen(command).read())

gdal_calc.py starting calculation ['numpy.sum(A>-3.4e+38, axis=0)']
file A: /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/resampled/ECO3ETPTJPL.001_EVAPOTRANSPIRATION_PT_JPL_ETinst_doy2019207003845_aid0001.tif, dimensions: 5602, 3869, type: Float32
Output file /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/monthgroup_averaged/ncount_2019_JAS_ETinst.tif exists - filling in results into file
output file: /Users/annaboser/Documents/GitHub/ET_agriculture/data/intermediate/ECOSTRESS/monthgroup_averaged/ncount_2019_JAS_ETinst.tif, dimensions: 5602, 3869, type: Float32
using blocksize 5602 x 1
0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 0.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 1.. 2.. 2.. 2.. 2.. 2.. 2.. 2.. 2.. 2