In [107]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [130]:
import os
import pdb

import numpy as np
import pandas as pd
from scipy.optimize import leastsq


def model_fourier(params, agdd, n_harm):
    """
    Fourier model
    :param params:
    :param agdd:
    :param n_harm:
    :return:
    """
    integration_time = len(agdd)
    t = np.arange(1, integration_time + 1)
    result = t*.0 + params[0]
    w = 1

    for i in range(1, n_harm * 4, 4):
        result = result + params[i] * np.cos(2.0 * np.pi * w * t / integration_time + params[i+1]) \
                 + params[i+2]*np.sin(2.0 * np.pi * w * t / integration_time + params[i+3])
        w += 1

    return result


def mismatch_function(params, func_phenology, ndvi, agdd):
    """
    The NDVI/Phenology model mismatch function
    :param params:
    :param func_phenology:
    :param ndvi:
    :param agdd:
    :param years:
    :return:
    """
    # output stores the predictions
    output = []

    oot = ndvi - func_phenology(params, agdd, n_harm=8)

    [output.append(x) for x in oot]

    return np.array(output).squeeze()


def do_fourier(ndvi, gdd, n_harm=8, init_params=None):
    """
    :param ndvi:
    :param gdd:
    :param n_harm:
    :param init_params:
    :return:
    """
    n_params = 1 + n_harm * 4

    if init_params is None:
        init_params = [.25, ] * n_params
        (xsol, mesg) = leastsq(mismatch_function, init_params, args=(model_fourier, ndvi, gdd), maxfev=1000000)
        model_fitted = model_fourier(xsol, gdd, n_harm)

    return model_fitted


def get_PTD(df):
    """
    Get phenological transition dates (greenup, senescence)
    :param df:
    :return:
    """
    # Input dataframe has an index comprised of day of year and remaining columns signify NDVI
    # Linearly interpolate dataframe columns to fill in missing values
    #plot(df['lai'].tolist())
    df = df.apply(pd.Series.interpolate)
    #plot(df['lai'].tolist())
    # Now compute mean of all columns and get the smoothened NDVI
    arr_smooth = do_fourier(df.mean(axis=1), [8.0] * len(df))
    #plot(arr_smooth)
    return arr_smooth

In [136]:
def myfunction(path):
    r=pd.read_csv(path).drop('Unnamed: 0',axis=1)[['GEOID','DOY','lai']]
    r['lai_smoothed']=0
    geoids=list(set(r.GEOID.tolist()))

    x=[8]*45
    x.append(5)
    w=r.loc[np.repeat(r.index.values,x*93)].reset_index(drop=True)
    w.DOY=list(range(1,366,1))*93
    w=pd.merge(w, r,  how='left', left_on=['GEOID','DOY'], right_on = ['GEOID','DOY']).drop(['lai_x','lai_smoothed_x'],axis=1)
    w.columns=['GEOID', 'DOY', 'lai', 'lai_smoothed']
    
    for g in geoids :
        p=w.ix[w.GEOID==g].reset_index(drop=True)
        w.ix[w.GEOID==g,'lai_smoothed']=get_PTD(pd.Series.to_frame(p.loc[0:366]['lai']))
    print(w)
    w.to_csv(path[:-4]+'_smoothed.csv')
    
myfunction('LAI_2012_soy.csv')
myfunction('LAI_2013_soy.csv')
myfunction('LAI_2015_soy.csv')
myfunction('LAI_2016_soy_updated.csv')
myfunction('LAI_2012_wheat.csv')
myfunction('LAI_2013_wheat.csv')
myfunction('LAI_2015_wheat.csv')
myfunction('LAI_2016_wheat_updated.csv')

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate_ix
  


       GEOID  DOY      lai  lai_smoothed
0      31007    1  0.00000  4.683174e-15
1      31007    2      NaN  4.680409e-15
2      31007    3      NaN  4.677241e-15
3      31007    4      NaN  4.673765e-15
4      31007    5      NaN  4.670052e-15
5      31007    6      NaN  4.666169e-15
6      31007    7      NaN  4.662246e-15
7      31007    8      NaN  4.658336e-15
8      31007    9  0.00000  4.654514e-15
9      31007   10      NaN  4.650855e-15
10     31007   11      NaN  4.647392e-15
11     31007   12      NaN  4.644194e-15
12     31007   13      NaN  4.641293e-15
13     31007   14      NaN  4.638745e-15
14     31007   15      NaN  4.636543e-15
15     31007   16      NaN  4.634741e-15
16     31007   17  0.00000  4.633307e-15
17     31007   18      NaN  4.632267e-15
18     31007   19      NaN  4.631590e-15
19     31007   20      NaN  4.631342e-15
20     31007   21      NaN  4.631346e-15
21     31007   22      NaN  4.631718e-15
22     31007   23      NaN  4.632430e-15
23     31007   2

       GEOID  DOY      lai  lai_smoothed
0      31007    1  0.38133      0.253078
1      31007    2      NaN      0.258406
2      31007    3      NaN      0.264647
3      31007    4      NaN      0.271725
4      31007    5      NaN      0.279555
5      31007    6      NaN      0.288041
6      31007    7      NaN      0.297082
7      31007    8      NaN      0.306567
8      31007    9  0.39610      0.316384
9      31007   10      NaN      0.326417
10     31007   11      NaN      0.336547
11     31007   12      NaN      0.346658
12     31007   13      NaN      0.356634
13     31007   14      NaN      0.366363
14     31007   15      NaN      0.375740
15     31007   16      NaN      0.384664
16     31007   17  0.39895      0.393045
17     31007   18      NaN      0.400799
18     31007   19      NaN      0.407853
19     31007   20      NaN      0.414147
20     31007   21      NaN      0.419631
21     31007   22      NaN      0.424267
22     31007   23      NaN      0.428031
23     31007   2

In [38]:
def plot(l):
    plt.plot(l)
    plt.xlabel("DOY")
    plt.ylabel("LAI")
    plt.title("LAI time series plot")
    plt.show()