In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [6]:
import os
import pdb

import numpy as np
import pandas as pd
from scipy.optimize import leastsq


def model_fourier(params, agdd, n_harm):
    """
    Fourier model
    :param params:
    :param agdd:
    :param n_harm:
    :return:
    """
    integration_time = len(agdd)
    t = np.arange(1, integration_time + 1)
    result = t*.0 + params[0]
    w = 1

    for i in range(1, n_harm * 4, 4):
        result = result + params[i] * np.cos(2.0 * np.pi * w * t / integration_time + params[i+1]) \
                 + params[i+2]*np.sin(2.0 * np.pi * w * t / integration_time + params[i+3])
        w += 1

    return result


def mismatch_function(params, func_phenology, ndvi, agdd):
    """
    The NDVI/Phenology model mismatch function
    :param params:
    :param func_phenology:
    :param ndvi:
    :param agdd:
    :param years:
    :return:
    """
    # output stores the predictions
    output = []

    oot = ndvi - func_phenology(params, agdd, n_harm=8)

    [output.append(x) for x in oot]

    return np.array(output).squeeze()


def do_fourier(ndvi, gdd, n_harm=8, init_params=None):
    """
    :param ndvi:
    :param gdd:
    :param n_harm:
    :param init_params:
    :return:
    """
    n_params = 1 + n_harm * 4

    if init_params is None:
        init_params = [.25, ] * n_params
        (xsol, mesg) = leastsq(mismatch_function, init_params, args=(model_fourier, ndvi, gdd), maxfev=1000000)
        model_fitted = model_fourier(xsol, gdd, n_harm)

    return model_fitted


def get_PTD(df):
    """
    Get phenological transition dates (greenup, senescence)
    :param df:
    :return:
    """
    # Input dataframe has an index comprised of day of year and remaining columns signify NDVI
    # Linearly interpolate dataframe columns to fill in missing values
    #plot(df['lai'].tolist())
    df = df.apply(pd.Series.interpolate)
    #plot(df['lai'].tolist())
    # Now compute mean of all columns and get the smoothened NDVI
    arr_smooth = do_fourier(df.mean(axis=1), [8.0] * len(df))
    #plot(arr_smooth)
    return arr_smooth

In [7]:
def myfunction(path):
    r=pd.read_csv(path).drop('Unnamed: 0',axis=1)[['GEOID','DOY','lai']]
    r['lai_smoothed']=0
    geoids=list(set(r.GEOID.tolist()))

    x=[8]*45
    x.append(5) #to make the total of 365 days
    w=r.loc[np.repeat(r.index.values,x*93)].reset_index(drop=True) # repeat the same value for the next 8 days
    w.DOY=list(range(1,366,1))*93
    w=pd.merge(w, r,  how='left', left_on=['GEOID','DOY'], right_on = ['GEOID','DOY']).drop(['lai_x','lai_smoothed_x'],axis=1)
    w.columns=['GEOID', 'DOY', 'lai', 'lai_smoothed']
    
    for g in geoids :
        p=w.ix[w.GEOID==g].reset_index(drop=True)
        w.ix[w.GEOID==g,'lai_smoothed']=get_PTD(pd.Series.to_frame(p.loc[0:366]['lai']))
    w['lai_smoothed'] = w['lai_smoothed'].clip(lower=0)
    w.to_csv(path[:-4]+'_smoothed.csv')
    
myfunction('LAI_2012_corn.csv')
myfunction('LAI_2013_corn.csv')
myfunction('LAI_2015_corn.csv')
myfunction('LAI_2016_corn_updated.csv')

myfunction('LAI_2012_soy.csv')
myfunction('LAI_2013_soy.csv')
myfunction('LAI_2015_soy.csv')
myfunction('LAI_2016_soy_updated.csv')

myfunction('LAI_2012_wheat.csv')
myfunction('LAI_2013_wheat.csv')
myfunction('LAI_2015_wheat.csv')
myfunction('LAI_2016_wheat_updated.csv')


.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  


In [None]:
def plot(l):
    plt.plot(l)
    plt.xlabel("DOY")
    plt.ylabel("LAI")
    plt.title("LAI time series plot")
    plt.show()