In [4]:
import pandas as pd
import numpy as np
import rasterio
import time
import random

In [5]:
df_insert = pd.DataFrame({'NDVI_corn_2015': [np.NaN]*7,
                    'NDVI_soy_2015': [np.NaN]*7})
def dataframe_extraction(r1,r2,row,col):
    """This function returns a pandas dataframe that holds the NDVI values for 1-365 doys of 2015 and 2016 years.
    inputs: r1, r2: the NDVI Rasterstacks for 2015 and 2016 years. Each raster has 365 layers one corresponding to a doy of that year.
            row, col: the row and column of the raster pixel we would like to get the dataframe for """
    id=np.array(range(1,1+r1.shape[0]))
    data_frame = (pd.DataFrame({'NDVI_corn_2015':r1[0:r1.shape[0]+1:1,row,col],
                          'NDVI_soy_2015':r2[0:r1.shape[0]+1:1,row,col]},index=id))
    df=pd.DataFrame()
    for i in range(len(data_frame)):
        df=pd.concat([df,data_frame.iloc[[i]],df_insert])
    df.index=range(1,1+len(df.index))
    return df[:313]

In [6]:
"""with rasterio.open('/Users/koutilya/Downloads/MOD09A1.h10v04.brdf_corrected/MOD09A1.A2015113.h10v04.brdf_product.02.01.tif') as src:
    print("CRS: ", src.crs)
    print("Band Count: ",src.count)   #Band count
    print("Indexes: ", src.indexes)
    print("Raster width:", src.width)
    print("Raster height:", src.height)
    print("DTypes of the Raster: ", src.dtypes)
    print("Extent of the Raster: ", src.bounds)
    print("Transformation from points to XYZ: ", src.transform)
    print("sample coordinate of the left uppermost point: ", src.transform * (0, 0))#row=0 column=0
    a=src.read()
    print("Total Count: ", (a.size))
    print(a.shape)
    print(a[0,1200,1200])
src=rasterio.open('MOD09A1.A2015113.h10v04.brdf_product.02.01.tif')
a=src.read()
print(a.shape)
k=rasterio.open('MOD09A1.A2016113.h10v04.brdf_product.02.01.tif')
b=k.read()
print(dataframe_extraction(a,b,1200,1200))"""

'with rasterio.open(\'/Users/koutilya/Downloads/MOD09A1.h10v04.brdf_corrected/MOD09A1.A2015113.h10v04.brdf_product.02.01.tif\') as src:\n    print("CRS: ", src.crs)\n    print("Band Count: ",src.count)   #Band count\n    print("Indexes: ", src.indexes)\n    print("Raster width:", src.width)\n    print("Raster height:", src.height)\n    print("DTypes of the Raster: ", src.dtypes)\n    print("Extent of the Raster: ", src.bounds)\n    print("Transformation from points to XYZ: ", src.transform)\n    print("sample coordinate of the left uppermost point: ", src.transform * (0, 0))#row=0 column=0\n    a=src.read()\n    print("Total Count: ", (a.size))\n    print(a.shape)\n    print(a[0,1200,1200])\nsrc=rasterio.open(\'MOD09A1.A2015113.h10v04.brdf_product.02.01.tif\')\na=src.read()\nprint(a.shape)\nk=rasterio.open(\'MOD09A1.A2016113.h10v04.brdf_product.02.01.tif\')\nb=k.read()\nprint(dataframe_extraction(a,b,1200,1200))'

In [15]:
import os
import pdb

import numpy as np
import pandas as pd
from scipy.optimize import leastsq


def model_fourier(params, agdd, n_harm):
    """
    Fourier model
    :param params:
    :param agdd:
    :param n_harm:
    :return:
    """
    integration_time = len(agdd)
    t = np.arange(1, integration_time + 1)
    result = t*.0 + params[0]
    w = 1

    for i in range(1, n_harm * 4, 4):
        result = result + params[i] * np.cos(2.0 * np.pi * w * t / integration_time + params[i+1]) \
                 + params[i+2]*np.sin(2.0 * np.pi * w * t / integration_time + params[i+3])
        w += 1

    return result


def mismatch_function(params, func_phenology, ndvi, agdd):
    """
    The NDVI/Phenology model mismatch function
    :param params:
    :param func_phenology:
    :param ndvi:
    :param agdd:
    :param years:
    :return:
    """
    # output stores the predictions
    output = []

    oot = ndvi - func_phenology(params, agdd, n_harm=8)

    [output.append(x) for x in oot]

    return np.array(output).squeeze()


def do_fourier(ndvi, gdd, n_harm=8, init_params=None):
    """
    :param ndvi:
    :param gdd:
    :param n_harm:
    :param init_params:
    :return:
    """
    n_params = 1 + n_harm * 4

    if init_params is None:
        init_params = [.25, ] * n_params
        (xsol, mesg) = leastsq(mismatch_function, init_params, args=(model_fourier, ndvi, gdd), maxfev=1000000)
        model_fitted = model_fourier(xsol, gdd, n_harm)

    return model_fitted


def get_PTD(df,gl,gu,sl,su):
    """
    Get phenological transition dates (greenup, senescence)
    :param df:
    :return:
    """
    # Input dataframe has an index comprised of day of year and remaining columns signify NDVI
    # Linearly interpolate dataframe columns to fill in missing values
    df = df.apply(pd.Series.interpolate)
    
    # Now compute mean of all columns and get the smoothened NDVI
    arr_smooth = do_fourier(df.mean(axis=1), [8.0] * len(df))
    """plt.plot(list(range(len(arr_smooth))),arr_smooth)
    plt.xlabel("DOY")
    plt.ylabel("NDVI")
    plt.title("NDVI smoothened plot")
    #plt.show()"""
    
    # For all other crops and regions, take differential
    # To get doy_green, find the last occurrence of the max differential
    diff_green = np.diff(arr_smooth[:365 + 1])
    
    """plt.plot(list(range(len(diff_green))),diff_green)
    plt.xlabel("DOY")
    plt.ylabel("NDVI differential")
    plt.title("NDVI diff_green plot")
    plt.show()"""
    
    doy_green = np.where(diff_green == diff_green[gl:gu+1].max())[0][-1]
    #doy_senesc = np.diff(arr_smooth[:365 + 1]).argmin()
    doy_senesc = np.where(diff_green == diff_green[sl:su+1].min())[0][-1]
    return doy_green, doy_senesc

In [8]:
#from pathos.multiprocessing import ProcessingPool as Pool
from multiprocessing import Pool
import matplotlib.pyplot as plt
%matplotlib inline
def initialize_rasters(path1,path2):
    raster1=rasterio.open(path1)
    tot_cols=raster1.width
    tot_rows=raster1.height
    a=raster1.read()
    print("Tot rows: ",tot_rows," Tot cols: ",tot_cols)
    raster2=rasterio.open(path2)
    b=raster2.read()
    if(b.shape[0]!=a.shape[0]):
        print("determine what doy you are missing!!")
        t1=b[0:6] #determine what doy you are missing!! in this case its DOY49 thus insert as 7th layer
        t2=b[6:b.shape[0]]
        p=b[0]*0
        p=p.reshape(1,tot_rows,tot_cols)

        tp=np.append(t1,p,axis=0)
        b=np.append(tp,t2,axis=0)
        #print(b.shape)
    return (a,b,tot_rows,tot_cols)
def ritvik_fn(df,gl,gu,sl,su):
    #return (random.randint(1,50),random.randint(120,365))
    return get_PTD(df,gl,gu,sl,su)

def myfunction(index,rasters):
    a=rasters[0]
    b=rasters[1]
    tot_cols=a.shape[2]
    tot_rows=a.shape[1]
    #print("Tot rows: ",tot_rows," Tot cols: ",tot_cols)
    row=int(index/tot_cols)
    col=index-(tot_cols*row)
    #print("row: ",row," col: ",col)
    df=dataframe_extraction(a,b,row,col)
    df[df<0]=None
    
    first_valid_indices=df.apply(lambda col: col.first_valid_index())
    for col in range(len(df.columns)):
        df.loc[1][col]=df.loc[first_valid_indices[col]][col] #ensuring the first values of all columns are not None/NA

    #print(df)
    pairs=list()
    for col in df.columns:
        if col=='NDVI_corn_2015':
            gl=140
            gu=176
            sl=213
            su=305
        elif col=='NDVI_soy_2015':
            gl=156
            gu=196
            sl=227
            su=290
        ndvi=df[col].tolist()
        clean=[x for x in ndvi if str(x) != 'nan']
        clean = [max(0, min(x, 10000)) for x in clean]
        maxi=int(max(clean))
        """plt.plot(list(range(len(clean))),clean)
        plt.xlabel("DOY")
        plt.ylabel("NDVI")
        plt.title("NDVI time series plot")
        plt.show()"""
        if(maxi>2000 and len(clean)>0):
            g,s=ritvik_fn(pd.Series.to_frame(df[col]),gl,gu,sl,su)
        else:
            g=0
            s=0
        pairs.append((g,s))

    return pairs

The doy interval we need to consider for the dataframe passed into ritvik_fn changes from crop to crop and ste to state. For eg, for winter wheat in Kansas, greep up happens in 1:60 doys and sennescence in 125:200 doys.

In [9]:
##a,b,tot_rows,tot_cols=initialize_rasters("A2015_177_ndvi_480m.tif","A2015_185_ndvi_480m.tif")
#src=rasterio.open('NDVI_480m_stack_soy.tif')
#a,b,tot_rows,tot_cols=initialize_rasters('NDVI_480m_stack_corn.tif','NDVI_480m_stack_soy.tif')
src=rasterio.open('MOD09Q1.A2012.NE.BRDF_ndvistack_corn.tif')
a,b,tot_rows,tot_cols=initialize_rasters('MOD09Q1.A2012.NE.BRDF_ndvistack_corn.tif','MOD09Q1.A2012.NE.BRDF_ndvistack_soy.tif')
y=[(a,b)]*tot_rows*tot_cols
ind=range(tot_rows*tot_cols)
l=list()
l=list(list(zip(ind,y))[:])
greenup=a[1]*0
sen=a[1]*0
plant=a[1]*0
har=a[1]*0
plant=plant.astype('int32')
har=har.astype('int32')

Tot rows:  757  Tot cols:  1521


In [16]:
index_test=575698+23+3+5
myfunction(l[index_test][0],(l[index_test][1][0],l[index_test][1][1]))

290
290
249
249


[(164, 290), (178, 249)]

In [70]:
start=time.time()
with Pool(processes=3) as pool:
    ind_start=575698
    ind_end=ind_start+100
    pairs_crops=pool.starmap(myfunction,l[ind_start:ind_end])
    pool.close()
    pool.join()
end=time.time()
print(end-start)
print(pairs_crops)

128.68490386009216
[[(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(162, 278), (0, 0)], [(157, 267), (0, 0)], [(159, 264), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(161, 277), (0, 0)], [(161, 266), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(162, 276), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(175, 272), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(161, 277), (154, 244)], [(162, 258), (0, 0)], [(161, 273), (0, 0)], [(158, 273), (154, 244)], [(159, 271), (154, 244)], [(165, 247), (154, 244)], [(163, 263), (167, 289)], [(166, 260), (165, 292)], [(164, 290), (178, 249)], [(165, 269), (188, 256)], [(160, 286), (177, 286)], [(159, 286), (126, 287)], [(159, 285), (126, 287)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(164, 243), (0, 0)], [(161, 288), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(0, 0), (0, 0)], [(159, 295), (178, 248)], [(160, 243),

In [131]:
start=time.time()
profile=src.profile
profile.update(count=1)
print(profile)
for j in range(len(pairs_crops[1])):
    pairs=[pairs_crops[i][0] for i in range(len(pairs_crops))]
    greenup=a[1]*0
    sen=a[1]*0
    for index in list(range(ind_start,ind_end)):
        row=int(index/tot_cols)
        col=index-(tot_cols*row)
        greenup[row][col]=pairs[index-ind_start][0]
        #plant[row][col]=pairs[index-ind_start][0]-15
        #print("Row: ",row," Col: ",col," ",greenup[row][col])
        sen[row][col]=pairs[index-ind_start][1]
        #har[row][col]=pairs[index-ind_start][1]+45
    #np.clip(plant, 1, 365, out=plant)
    #np.clip(har, 1, 365, out=har)
    #plant=plant.astype('uint32')
    #har=har.astype('uint32')
    
    with rasterio.open('greenup_'+str(j)+'.tif', 'w', **profile) as dst:
        dst.write(greenup.astype(rasterio.float64), 1)
    with rasterio.open('sen_'+str(j)+'.tif', 'w', **profile) as dst:
        dst.write(sen.astype(rasterio.float64), 1)
print(time.time()-start)

{'driver': 'GTiff', 'crs': CRS({'units': 'm', 'lon_0': -96, 'wktext': True, 'ellps': 'GRS80', 'no_defs': True, 'proj': 'aea', 'towgs84': '0,0,0,0,0,0,0', 'lat_0': 23, 'lat_1': 29.5, 'lat_2': 45.5, 'y_0': 0, 'x_0': 0}), 'nodata': -1.7e+308, 'dtype': 'float64', 'count': 1, 'transform': Affine(480.0, 0.0, -671140.962891044,
       0.0, -479.9999999999999, 2250662.64099545), 'tiled': False, 'interleave': 'pixel', 'height': 757, 'width': 1520}
0.14362692832946777


  import sys
  
