In [None]:
# this code: modifies Tej's code to only include dates in the first half of the year 
# previously: 23 16-day periods
# now: 11 16-day periods, 176 days until ~ June 25

In [1]:
import ee

try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()

*** Earth Engine *** FINAL DEADLINE: ee.Authenticate will fail after 2022-06-06. Please upgrade. https://developers.google.com/earth-engine/guides/python_install


In [2]:
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt 
import geopandas as gpd
# import folium
import json
import os
import csv

# import matplotlib as mpl

# import matplotlib.animation as animation
# from multiprocessing import Pool
from datetime import datetime, timedelta

In [3]:
# ee_afg=ee.FeatureCollection("FAO/GAUL/2015/level0").filter(ee.Filter.eq('ADM0_NAME', 'Afghanistan'))
# ee_hilmand=ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level1").filter(ee.Filter.eq('ADM1_NAME', 'Hilmand'))
# ee_hilmand_dist=ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level2").filter(ee.Filter.eq('ADM1_NAME', 'Hilmand'))

In [4]:
# afg_shp=gpd.read_file('district398/district398.shp')
tiles_25 = gpd.read_file('/data/afg_satellite/Grids/AFG_025_grid.shp')

In [5]:
def shp_to_fc(file)->ee.FeatureCollection:
    """
    convert simple polygon/ shapefile to earth engine feature collection
    Note: doesn't seem to work very well with disjoint polygons/ nested polygons
    """
    g = [i for i in file.geometry]
    features=[]
    for i in range(len(g)):
        g = [i for i in file.geometry]
        x,y = g[i].exterior.coords.xy
        cords = np.dstack((x,y)).tolist()

        g=ee.Geometry.Polygon(cords)
        feature = ee.Feature(g)
        features.append(feature)
#     print("done")

    ee_object = ee.FeatureCollection(features)
    return ee_object

In [6]:
# Does all the work needed, taking an input of a specific tile number, and the year to request data for,
# and returning an eeList of each pixel and their peak NDVI timestep.
# Takes about 8 seconds to run for a given tile and year

def imagetobands(tile, year):
    top = tiles_25.iloc[tile]["top"]
    bottom = tiles_25.iloc[tile]["bottom"]
    left = tiles_25.iloc[tile]["left"]
    right = tiles_25.iloc[tile]["right"]
    tilegeo = shp_to_fc(tiles_25[tiles_25.index==tile]).geometry()

#     dates=[datetime(year, 1, 1) + timedelta(i - 1) for i in range(1, 370, 16)]
    dates=[datetime(year, 1, 1) + timedelta(i - 1) for i in range(1, 170, 16)]
    date_range=[(x, y) for x, y in zip(dates[:-1], [date-timedelta(1) for date in dates[1:]])]
    just_dates = []
    for start, end in date_range:
        stad=start.date().strftime('%Y-%m-%d')
        endd=end.date().strftime('%Y-%m-%d')
        stadendd = (stad, endd)
        just_dates.append(stadendd)
        
    images = []
    for timeper in just_dates:
        # update 8/29/22: version 6 has been updated to version 6.1: https://lpdaac.usgs.gov/products/mod13q1v061/
        mod_dataset=ee.ImageCollection('MODIS/061/MOD13Q1').filter(ee.Filter.date(timeper[0], timeper[1])).filter(ee.Filter.bounds(tilegeo))
#         mod_dataset=ee.ImageCollection('MODIS/006/MOD13Q1').filter(ee.Filter.date(timeper[0], timeper[1])).filter(ee.Filter.bounds(tilegeo))
        mod_dataset_NDVI = mod_dataset.select("NDVI")
        mod_image_NDVI = mod_dataset_NDVI.first()
        mod_image_NDVI_U = mod_image_NDVI.unmask(-9999)
        images.append(mod_image_NDVI_U)
        
    clips = [] 
    for image in images:
        clip = image.clip(tilegeo).multiply(0.0001)
        clips.append(clip)
    
    oafgyr = ee.Image.cat(clips)
    numtolet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
#     oldnames = [oafgyr.getInfo().get('bands')[i].get("id") for i in range(23)]
    oldnames = [oafgyr.getInfo().get('bands')[i].get("id") for i in range(10)]
#     newnames = ["Timestep " + numtolet[i] for i in range(23)]
    newnames = ["Timestep " + numtolet[i] for i in range(10)]
    afgyr = oafgyr.select(oldnames,newnames)
    
    afgyrarr = afgyr.toArray()
    timesyr = afgyrarr.arrayArgmax().arrayFlatten([['max']])
    maxndvis = afgyrarr.arrayGet(timesyr)
    
    def tpertodate(tper):
        daterange = just_dates[tper][0] + " - " + just_dates[tper][1]
        return daterange
    
    pixsample = timesyr.sample(region=tilegeo,scale=250,projection='SR-ORG:6974',geometries=True, factor=1)
    ndvisample = maxndvis.sample(region=tilegeo,scale=250,projection='SR-ORG:6974',geometries=True, factor=1)
    pixsampsize = pixsample.size().getInfo()
    ndvisampsize = ndvisample.size().getInfo()
    pixellist = pixsample.toList(pixsampsize)
    ndvilist = ndvisample.toList(ndvisampsize)
    
    return pixellist, ndvilist, just_dates

In [7]:
# Functions to extract information from the images' getInfo dictionaries

def getlong(dicti):
    return dicti.get('coordinates')[0]

def getlat(dicti):
    return dicti.get('coordinates')[1]

def getmax(dicti):
    return dicti.get('max')

def getndvi(dicti):
    return dicti.get('array')

In [8]:
# Specifying the tiles and years for which data is to be gathered

alltiles = list(range(1127))
# allyears = [2014, 2015, 2016, 2017, 2018, 2019, 2020]
allyears = [2021, 2022] # update 8/29/22: run latest years 

In [9]:
# Gathers data for the specified tiles and years. Runtime is about 1 minute per tile. Saves data to the specified
# filepath at the bottom under "filename". Data for tile XXX will be saved in that folder as XXXdata.csv

for tile in alltiles:
    count = 0
    masterdf = pd.DataFrame

    for year in allyears:
        ps, ns, ds = imagetobands(tile, year)
        jsont = json.dumps(ps.getInfo())
        jsonnt = json.dumps(ns.getInfo())
        dft = pd.read_json(jsont)
        dfnt = pd.read_json(jsonnt)
        longs = dft['geometry'].apply(getlong)
        lats = dft['geometry'].apply(getlat)
        tpertodatestarts = lambda tper: ds[tper][0]
        tpertodateends = lambda tper: ds[tper][1]
        starts = dft['properties'].apply(getmax).apply(tpertodatestarts)
        ends = dft['properties'].apply(getmax).apply(tpertodateends)
        ndvis = dfnt['properties'].apply(getndvi)

        if (count == 0):
            masterdf = pd.DataFrame({'Latitude': lats, 'Longitude': longs, "Best Start Date " + str(year): starts, 
                                    "Best End Date " + str(year): ends, "Peak NDVI " + str(year): ndvis})
        else:
            dff = pd.DataFrame({'Latitude': lats, 'Longitude': longs, "Best Start Date " + str(year): starts, 
                                "Best End Date " + str(year): ends, "Peak NDVI " + str(year): ndvis})
            masterdf = masterdf.merge(dff, how="outer", on=["Latitude", "Longitude"])

        count += 1
    
    filename = "/data/afg_satellite/bestdates/pixel_maxdata_real_June_21_22/" + str(tile)+"data.csv"
#     filename = "/data/afg_satellite/bestdates/pixel_maxdata_real_June/" + str(tile)+"data.csv"
    masterdf.to_csv(filename, index=None)

In [14]:
dates=[datetime(2014, 1, 1) + timedelta(i - 1) for i in range(1, 370, 16)]
dates

[datetime.datetime(2014, 1, 1, 0, 0),
 datetime.datetime(2014, 1, 17, 0, 0),
 datetime.datetime(2014, 2, 2, 0, 0),
 datetime.datetime(2014, 2, 18, 0, 0),
 datetime.datetime(2014, 3, 6, 0, 0),
 datetime.datetime(2014, 3, 22, 0, 0),
 datetime.datetime(2014, 4, 7, 0, 0),
 datetime.datetime(2014, 4, 23, 0, 0),
 datetime.datetime(2014, 5, 9, 0, 0),
 datetime.datetime(2014, 5, 25, 0, 0),
 datetime.datetime(2014, 6, 10, 0, 0),
 datetime.datetime(2014, 6, 26, 0, 0),
 datetime.datetime(2014, 7, 12, 0, 0),
 datetime.datetime(2014, 7, 28, 0, 0),
 datetime.datetime(2014, 8, 13, 0, 0),
 datetime.datetime(2014, 8, 29, 0, 0),
 datetime.datetime(2014, 9, 14, 0, 0),
 datetime.datetime(2014, 9, 30, 0, 0),
 datetime.datetime(2014, 10, 16, 0, 0),
 datetime.datetime(2014, 11, 1, 0, 0),
 datetime.datetime(2014, 11, 17, 0, 0),
 datetime.datetime(2014, 12, 3, 0, 0),
 datetime.datetime(2014, 12, 19, 0, 0),
 datetime.datetime(2015, 1, 4, 0, 0)]