# Paddy rice yield estimation over Thai Binh, Vietnam

In [1]:
import os
import sys
import itertools
import glob
# from collections import OrderedDict

import numpy as np
import pandas as pd
from osgeo import gdal, gdal_array, osr, ogr

# from scipy import ndimage
# import scipy

import matplotlib as mpl
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits import axes_grid1

import seaborn as sns

In [2]:
sns.set_context("paper")
sns.set_style("whitegrid")
dpi = 300

In [3]:
import plotly.plotly as pply
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=False) # run at the start of every ipython notebook to use plotly.offline
                     # this injects the plotly.js source files into the notebook
import plotly.tools

In [4]:
gdal.AllRegister()

In [5]:
import geo_ts as gt

In [6]:
crop_yield_field_file = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-field/Thai_Binh_select_variables_crop_cutting.xlsx"
cls_map_file = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-cls/vietnam_thai_bin_cls_rf_lsat_scenes.img"

# ALOS PALSAR data
alos_hh_imgfiles = [\
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073935/IMG-HH-ALOS2058563200-150624-WBDR2.1GUD.UTM48N.tif', \
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073936/IMG-HH-ALOS2062703200-150722-WBDR2.1GUD.UTM48N.tif', \
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073937/IMG-HH-ALOS2070983200-150916-WBDR2.1GUD.UTM48N.tif', \
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073938/IMG-HH-ALOS2075123200-151014-WBDR2.1GUD.UTM48N.tif' \
                   ]
alos_hv_imgfiles = [\
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073935/IMG-HV-ALOS2058563200-150624-WBDR2.1GUD.UTM48N.tif', \
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073936/IMG-HV-ALOS2062703200-150722-WBDR2.1GUD.UTM48N.tif', \
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073937/IMG-HV-ALOS2070983200-150916-WBDR2.1GUD.UTM48N.tif', \
'/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-alos/0000073938/IMG-HV-ALOS2075123200-151014-WBDR2.1GUD.UTM48N.tif' \
                   ]

In [7]:
crop_yield_field = pd.read_excel(crop_yield_field_file, sheetname=0, \
                                 index_col=2)

In [8]:
colname = crop_yield_field.columns.values
colname[2] = "lat_m" # mesh lat
colname[3] = "lon_m" # mesh lon
colname[4] = "lat_s" # subplot lat
colname[5] = "lon_s" # subplot lon
colname[7] = "yield_field" # field measured yield
crop_yield_field.columns = colname

In [9]:
crop_yield_field['yield_field'].describe()

count     256.000000
mean     3240.097873
std       685.090059
min       995.587832
25%      2851.264670
50%      3248.038368
75%      3708.969943
max      4911.539109
Name: yield_field, dtype: float64

In [10]:
crop_yield_field.head()

Unnamed: 0_level_0,mesh_id,plot_id,lat_m,lon_m,lat_s,lon_s,variety,yield_field
full_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
004622-p02,4622,p02,20.70122,106.32945,20.700968,106.327124,Nep,2398.245052
004622-p04,4622,p04,20.70122,106.32945,20.701456,106.327954,BC15,3473.247317
004622-p08,4622,p08,20.70122,106.32945,20.701401,106.32843,Nep,1641.749412
004622-p10,4622,p10,20.70122,106.32945,20.701609,106.328702,BC15,2588.439197
011504-p01,11504,p01,20.65982,106.29705,20.660043,106.296676,Thien Uu,1381.913932


In [11]:
# geo_points = OrderedDict([(idx,(crop_yield_field.loc[idx, 'lon_s'], crop_yield_field.loc[idx, 'lat_s'])) for idx in crop_yield_field.index])
geo_points = pd.DataFrame(np.zeros((len(crop_yield_field), 2)), index=crop_yield_field.index, columns=['lon', 'lat'])
geo_points.loc[:, 'lat'] = crop_yield_field.loc[:, 'lat_s']
geo_points.loc[:, 'lon'] = crop_yield_field.loc[:, 'lon_s']

In [12]:
# proj_points = OrderedDict([(k, gt.geo2Proj(cls_map_file, geo_points[k][0], geo_points[k][1])) for k in geo_points.keys()])
# img_points = OrderedDict([(k, gt.geo2Pixel(cls_map_file, geo_points[k][0], geo_points[k][1], ret_int=False)) for k in geo_points.keys()])
proj_points = pd.DataFrame(np.zeros((len(geo_points), 2)), index=geo_points.index, columns=['x', 'y'])
for k in proj_points.index:
    tmp = gt.geo2Proj(cls_map_file, geo_points.loc[k, 'lon'], geo_points.loc[k, 'lat'])
    proj_points.loc[k, 'x'] = tmp[0]
    proj_points.loc[k, 'y'] = tmp[1]
    
img_points = pd.DataFrame(np.zeros((len(geo_points), 2)), index=geo_points.index, columns=['sample', 'line'])
for k in img_points.index:
    tmp = gt.geo2Pixel(cls_map_file, geo_points.loc[k, 'lon'], geo_points.loc[k, 'lat'], ret_int=False)
    img_points.loc[k, 'sample'] = tmp[0]
    img_points.loc[k, 'line'] = tmp[1]
    
alos_img_points = pd.DataFrame(np.zeros((len(geo_points), 2)), index=geo_points.index, columns=['sample', 'line'])
for k in alos_img_points.index:
    tmp = gt.geo2Pixel(alos_hh_imgfiles[0], geo_points.loc[k, 'lon'], geo_points.loc[k, 'lat'], ret_int=False)
    alos_img_points.loc[k, 'sample'] = tmp[0]
    alos_img_points.loc[k, 'line'] = tmp[1]

In [13]:
subp_imgsize = 2.5/30
# img_points_ur = OrderedDict([(k, (int(img_points[k][0]+subp_imgsize), int(img_points[k][1]-subp_imgsize))) for k in img_points.keys()])
# img_points_ll = OrderedDict([(k, (int(img_points[k][0]), int(img_points[k][1]))) for k in img_points.keys()])
# img_points_ctr = OrderedDict([(k, (int(img_points[k][0]+subp_imgsize*0.5), int(img_points[k][1]-subp_imgsize*0.5))) for k in img_points.keys()])
img_points_ur = img_points.copy()
img_points_ur.loc[:, 'sample'] = (img_points.loc[:, 'sample']+subp_imgsize).astype(int)
img_points_ur.loc[:, 'line'] = (img_points.loc[:, 'line']-subp_imgsize).astype(int)

img_points_ll = img_points.copy()
img_points_ll.loc[:, 'sample'] = img_points.loc[:, 'sample'].astype(int)
img_points_ll.loc[:, 'line'] = img_points.loc[:, 'line'].astype(int)

img_points_ul = img_points.copy()
img_points_ul.loc[:, 'sample'] = (img_points.loc[:, 'sample']).astype(int)
img_points_ul.loc[:, 'line'] = (img_points.loc[:, 'line']-subp_imgsize).astype(int)

img_points_lr = img_points.copy()
img_points_lr.loc[:, 'sample'] = (img_points.loc[:, 'sample']+subp_imgsize).astype(int)
img_points_lr.loc[:, 'line'] = (img_points.loc[:, 'line']).astype(int)

img_points_ctr = img_points.copy()
img_points_ctr.loc[:, 'sample'] = (img_points.loc[:, 'sample']+0.5*subp_imgsize).astype(int)
img_points_ctr.loc[:, 'line'] = (img_points.loc[:, 'line']-0.5*subp_imgsize).astype(int)

alos_subp_imgsize = 2.5/25
alos_img_points_ctr = alos_img_points.copy()
alos_img_points_ctr.loc[:, 'sample'] = (alos_img_points.loc[:, 'sample']+0.5*alos_subp_imgsize).astype(int)
alos_img_points_ctr.loc[:, 'line'] = (alos_img_points.loc[:, 'line']-0.5*alos_subp_imgsize).astype(int)

## Select field subplots falling into our image extent

In [14]:
cls_map_meta = gt.getRasterMetaGdal(cls_map_file)

In [15]:
ncols = cls_map_meta['RasterXSize']
nrows = cls_map_meta['RasterYSize']

In [16]:
in_subp_flag = reduce(np.logical_and, (img_points_ur.loc[:, 'sample']>=0, img_points_ur.loc[:, 'sample']<ncols, \
                                       img_points_ur.loc[:, 'line']>=0, img_points_ur.loc[:, 'line']<nrows, \
                                       img_points_ll.loc[:, 'sample']>=0, img_points_ll.loc[:, 'sample']<ncols, \
                                       img_points_ll.loc[:, 'line']>=0, img_points_ll.loc[:, 'line']<nrows))

In [17]:
crop_yield_field.loc[np.logical_not(in_subp_flag), :]

Unnamed: 0_level_0,mesh_id,plot_id,lat_m,lon_m,lat_s,lon_s,variety,yield_field
full_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
017850-p10,17850,p10,20.62202,106.37985,20.621892,106,BC15,3609.709322


In [18]:
crop_yield_field = crop_yield_field.loc[in_subp_flag, :]

geo_points = geo_points.loc[in_subp_flag, :]
proj_points = proj_points.loc[in_subp_flag, :]
img_points = img_points.loc[in_subp_flag, :]
img_points_ur = img_points_ur.loc[in_subp_flag, :]
img_points_ll = img_points_ll.loc[in_subp_flag, :]
img_points_ul = img_points_ur.loc[in_subp_flag, :]
img_points_lr = img_points_ll.loc[in_subp_flag, :]
img_points_ctr = img_points_ctr.loc[in_subp_flag, :]

In [19]:
# single_pix_subp_flag = np.array([(img_points_ur[k][0]==img_points_ll[k][0]) and (img_points_ur[k][1]==img_points_ll[k][1]) for k in img_points.keys()], dtype=np.bool_)
single_pix_subp_flag = np.logical_and(img_points_ur.loc[:, 'sample']==img_points_ll.loc[:, 'sample'], \
                                      img_points_ur.loc[:, 'line']==img_points_ll.loc[:, 'line'])

## Read classification map

In [20]:
from classify_image import ImageClassifier

In [21]:
ic = ImageClassifier()
cls_map = ic.readRaster(cls_map_file)[0][0]

In [22]:
# Select subplots with four corners all in crop class pixels
crop_pix_flag = reduce(np.logical_and, \
                       (cls_map[img_points_ll.loc[:, 'line'], img_points_ll.loc[:, 'sample']]==1, \
                        cls_map[img_points_lr.loc[:, 'line'], img_points_lr.loc[:, 'sample']]==1, \
                        cls_map[img_points_ur.loc[:, 'line'], img_points_ur.loc[:, 'sample']]==1, \
                        cls_map[img_points_ul.loc[:, 'line'], img_points_ul.loc[:, 'sample']]==1))

In [23]:
cls_map[img_points_ll.loc[:, 'line'], img_points_ll.loc[:, 'sample']]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1, 1, 4, 1,
       1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 4, 1, 1, 1, 3, 3, 2, 1, 1, 1, 1, 1,
       1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
       1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1,
       1, 1], dtype=uint8)

In [24]:
cls_map[img_points_ur.loc[:, 'line'], img_points_ur.loc[:, 'sample']]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 3, 1, 1, 3, 3, 2, 3, 1, 1, 1, 1,
       1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
       1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 1, 3, 3, 1, 1, 1, 1, 1, 4, 1, 1,
       1, 1], dtype=uint8)

In [25]:
cls_map[img_points_ctr.loc[:, 'line'], img_points_ctr.loc[:, 'sample']]

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 1,
       1, 1, 1, 1, 4, 3, 1, 1, 1, 1, 1, 4, 1, 1, 1, 3, 3, 2, 1, 1, 1, 1, 1,
       1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1,
       1, 3, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 1, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1,
       1, 1], dtype=uint8)

## Time series data

In [26]:
# time series file by fusion of Landsat and MODIS BEFORE TIMESAT smoothing
ts_ndvi_fused_file = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts/predicted_NDVI"
# time series data by SG filter of TIMESAT to the fused data
fitsg_ndvi_imgfiles = glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-ts-sg/fitSG_NDVI_126046.2015[0-9][0-9][0-9]")
fitsmn_ndvi_imgfiles = glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts/mat-files/raster-files/smoothn_predicted_ndvi_b[0-9][0-9][0-9].bin")

# new fusion data by combining both Aqua and Terra
ts_ndvi_fused_new_files = glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/plndsr_500.126046.2015[0-9][0-9][0-9].ndvi.bin") \
                          + glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/psub_lndsr.126046.2015[0-9][0-9][0-9].ndvi.bin")
ts_ndvi_fused_new_files = np.array(ts_ndvi_fused_new_files)
ts_ndvi_fused_new_doy = np.array([int(imgf.split(".")[2][-3:]) for imgf in ts_ndvi_fused_new_files])
tmp, cnt = np.unique(ts_ndvi_fused_new_doy, return_counts=True)
tmp_flag = np.ones(len(ts_ndvi_fused_new_files), dtype=np.bool_)
nonfuse_flag = np.zeros(len(ts_ndvi_fused_new_files), dtype=np.bool_)
for d in tmp[cnt>1]:
    tmp_ind = np.where(ts_ndvi_fused_new_files == "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/plndsr_500.126046.2015{0:03d}.ndvi.bin".format(d))[0]
    tmp_flag[tmp_ind] = False
    tmp_ind = np.where(ts_ndvi_fused_new_files == "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/psub_lndsr.126046.2015{0:03d}.ndvi.bin".format(d))[0]
    nonfuse_flag[tmp_ind] = True
nonfuse_doy = np.array([int(imgf.split(".")[2][-3:]) for imgf in ts_ndvi_fused_new_files[nonfuse_flag]])
ts_ndvi_fused_new_files = ts_ndvi_fused_new_files[tmp_flag]

In [27]:
fnames, tmp = gt.getTsFromImgs(fitsg_ndvi_imgfiles, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
sg_ndvi_doy = np.array([int(imgf.split(".")[1][4:]) for imgf in fnames])
sg_ndvi_ts_data = pd.DataFrame(tmp, columns=sg_ndvi_doy, index=img_points_ctr.index)
# sort the data points according to doy
sort_ind = np.argsort(sg_ndvi_doy)
sg_ndvi_ts_data = sg_ndvi_ts_data.reindex_axis(sg_ndvi_ts_data.columns[sort_ind], axis='columns', copy=False)
sg_ndvi_doy = sg_ndvi_doy[sort_ind]

In [28]:
fnames, tmp = gt.getTsFromImgs(fitsmn_ndvi_imgfiles, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
smn_ndvi_doy = np.array([int(imgf.split(".")[0][-3:]) for imgf in fnames])
smn_ndvi_ts_data = pd.DataFrame(tmp, columns=smn_ndvi_doy, index=img_points_ctr.index)
# sort the data points according to doy
sort_ind = np.argsort(smn_ndvi_doy)
smn_ndvi_ts_data = smn_ndvi_ts_data.reindex_axis(smn_ndvi_ts_data.columns[sort_ind], axis='columns', copy=False)
smn_ndvi_doy = smn_ndvi_doy[sort_ind]

In [29]:
# x = np.array([img_points_ctr[k][0] for k in img_points_ctr.keys()])
# y = np.array([img_points_ctr[k][1] for k in img_points_ctr.keys()])
tmp = gt.readPixelsGdal(ts_ndvi_fused_file, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
ts_ndvi_fused_doy = np.arange(tmp.shape[1])+1
ts_ndvi_fused_data = pd.DataFrame(tmp, columns=ts_ndvi_fused_doy, index=img_points_ctr.index)
# sort the data points according to doy
sort_ind = np.argsort(ts_ndvi_fused_doy)
ts_ndvi_fused_data = ts_ndvi_fused_data.reindex_axis(ts_ndvi_fused_data.columns[sort_ind], axis='columns', copy=False)
ts_ndvi_fused_doy = ts_ndvi_fused_doy[sort_ind]

In [30]:
fnames, tmp = gt.getTsFromImgs(ts_ndvi_fused_new_files, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
ts_ndvi_fused_new_doy = np.array([int(imgf.split(".")[2][-3:]) for imgf in fnames])
ts_ndvi_fused_new_data = pd.DataFrame(tmp, columns=ts_ndvi_fused_new_doy, index=img_points_ctr.index)
# sort the data points according to doy
sort_ind = np.argsort(ts_ndvi_fused_new_doy)
ts_ndvi_fused_new_data = ts_ndvi_fused_new_data.reindex_axis(ts_ndvi_fused_new_data.columns[sort_ind], axis='columns', copy=False)
ts_ndvi_fused_new_doy = ts_ndvi_fused_new_doy[sort_ind]

In [31]:
no_data = -9999
# filter the time series that do not have any valid value
valid_sg_ts_flag = np.logical_not(np.logical_or(sg_ndvi_ts_data==no_data, sg_ndvi_ts_data<0).all(axis=1))
valid_smn_ts_flag = np.logical_not(np.logical_or(smn_ndvi_ts_data==no_data, smn_ndvi_ts_data<0).all(axis=1))

In [32]:
def fnameToDoy(fname):
    import datetime
    tmpstr = fname.split('-')[-2]
    year = int(tmpstr[0:2])+2000
    month = int(tmpstr[2:4])
    day = int(tmpstr[4:])
    return datetime.date(year, month, day).timetuple().tm_yday

In [33]:
tmp = gt.getTsFromImgs(alos_hh_imgfiles, alos_img_points_ctr.loc[:, 'sample'], alos_img_points_ctr.loc[:, 'line'])
ts_alos_hh_doy = np.array([fnameToDoy(imgf) for imgf in alos_hh_imgfiles])
ts_alos_hh_data = pd.DataFrame(tmp[1], columns=ts_alos_hh_doy, index=alos_img_points_ctr.index)
# sort the data points according to doy
sort_ind = np.argsort(ts_alos_hh_doy)
ts_alos_hh_data = ts_alos_hh_data.reindex_axis(ts_alos_hh_data.columns[sort_ind], axis='columns', copy=False)
ts_alos_hh_doy = ts_alos_hh_doy[sort_ind]

tmp = gt.getTsFromImgs(alos_hv_imgfiles, alos_img_points_ctr.loc[:, 'sample'], alos_img_points_ctr.loc[:, 'line'])
ts_alos_hv_doy = np.array([fnameToDoy(imgf) for imgf in alos_hv_imgfiles])
ts_alos_hv_data = pd.DataFrame(tmp[1], columns=ts_alos_hv_doy, index=alos_img_points_ctr.index)
# sort the data points according to doy
sort_ind = np.argsort(ts_alos_hv_doy)
ts_alos_hv_data = ts_alos_hv_data.reindex_axis(ts_alos_hv_data.columns[sort_ind], axis='columns', copy=False)
ts_alos_hv_doy = ts_alos_hv_doy[sort_ind]

In [34]:
ts_alos_hh_data_norm = ts_alos_hh_data.astype(float) / np.tile(np.reshape(ts_alos_hh_data.max(axis='columns'), (ts_alos_hh_data.shape[0], 1)), (1, ts_alos_hh_data.shape[1]))
ts_alos_hv_data_norm = ts_alos_hv_data.astype(float) / np.tile(np.reshape(ts_alos_hv_data.max(axis='columns'), (ts_alos_hv_data.shape[0], 1)), (1, ts_alos_hv_data.shape[1]))

## Define the phenology period

In [35]:
sos_doy = 183
eos_doy = 365

In [36]:
# set up a dataframe to store NDVI metrics
sg_ndvi_metrics = pd.DataFrame(np.zeros((len(geo_points), 3)), \
                            columns=['ndvi_max', 'ndvi_mean', 'ndvi_sum'], \
                            index=geo_points.index)

ts_flag = np.logical_and(sg_ndvi_doy >= sos_doy, sg_ndvi_doy <= eos_doy)
sg_ndvi_metrics.loc[valid_sg_ts_flag.index, 'ndvi_max'] = sg_ndvi_ts_data.loc[valid_sg_ts_flag.index, ts_flag].max(axis=1)
sg_ndvi_metrics.loc[valid_sg_ts_flag.index, 'ndvi_sum'] = sg_ndvi_ts_data.loc[valid_sg_ts_flag.index, ts_flag].sum(axis=1)
sg_ndvi_metrics.loc[valid_sg_ts_flag.index, 'ndvi_mean'] = sg_ndvi_ts_data.loc[valid_sg_ts_flag.index, ts_flag].mean(axis=1)

In [37]:
smn_ndvi_metrics = pd.DataFrame(np.zeros((len(geo_points), 3)), \
                            columns=['ndvi_max', 'ndvi_mean', 'ndvi_sum'], \
                            index=geo_points.index)

ts_flag = np.logical_and(smn_ndvi_doy >= sos_doy, smn_ndvi_doy <= eos_doy)
smn_ndvi_metrics.loc[valid_smn_ts_flag.index, 'ndvi_max'] = smn_ndvi_ts_data.loc[valid_smn_ts_flag.index, ts_flag].max(axis=1)
smn_ndvi_metrics.loc[valid_smn_ts_flag.index, 'ndvi_sum'] = smn_ndvi_ts_data.loc[valid_smn_ts_flag.index, ts_flag].sum(axis=1)
smn_ndvi_metrics.loc[valid_smn_ts_flag.index, 'ndvi_mean'] = smn_ndvi_ts_data.loc[valid_smn_ts_flag.index, ts_flag].mean(axis=1)

## Select proper field subplots we want to use

In [38]:
select_keys = single_pix_subp_flag.index[single_pix_subp_flag & crop_pix_flag]

In [39]:
select_keys

Index([u'004622-p02', u'004622-p08', u'004622-p10', u'011504-p01',
       u'011504-p02', u'011504-p04', u'011504-p05', u'012748-p01',
       u'012748-p02', u'012748-p04',
       ...
       u'065186-p06', u'065186-p10', u'065186-p18', u'065326-p08',
       u'068333-p01', u'068333-p05', u'072557-p02', u'072557-p04',
       u'072557-p10', u'072557-p13'],
      dtype='object', name=u'full_id', length=188)

## Visualize some time series

In [40]:
n_rnd = 10
rnd_keys = np.random.choice(select_keys, size=n_rnd, replace=False)

In [41]:
for k in rnd_keys:
    print k

025236-p04
020292-p02
043402-p20
061053-p08
038849-p04
052026-p03
042489-p23
034300-p21
033985-p01
027474-p05


In [42]:
gt.plotTs([ts_ndvi_fused_doy, ts_ndvi_fused_new_doy, sg_ndvi_doy, smn_ndvi_doy, ts_alos_hh_doy, ts_alos_hv_doy], \
          [ts_ndvi_fused_data.loc[rnd_keys, :], ts_ndvi_fused_new_data.loc[rnd_keys, :], sg_ndvi_ts_data.loc[rnd_keys, :], smn_ndvi_ts_data.loc[rnd_keys, :], \
           ts_alos_hh_data_norm.loc[rnd_keys, :], ts_alos_hv_data_norm.loc[rnd_keys, :]], \
          geo_points.loc[rnd_keys, :], \
          plot_kw_dict_list=[dict(label="NDVI Predicted (fused TerraOnly+ LC8)"), dict(label="NDVI Predicted (fused AquaTerra + LC8)"), dict(label="NDVI TIMESAT SG"), dict(label='NDVI Smoothn'), \
                             dict(label='ALOS HH'), dict(label='ALOS HV')], \
          ax_kw_dict=dict(ylim=(0, 1.1)), \
          style_list=['.r', '.b', '-k', '--k', '^c', '^m'], use_plotly=True, save_fig=False)

## Explore the relationship between crop yield and NDVI metrics

In [43]:
sg_select_keys = single_pix_subp_flag.index[single_pix_subp_flag & crop_pix_flag & valid_sg_ts_flag]
y = crop_yield_field.loc[sg_select_keys, 'yield_field']

for i, metric_name in enumerate(sg_ndvi_metrics.columns.values):
    fig, ax = plt.subplots(figsize=(8, 6))
    x = sg_ndvi_metrics.loc[sg_select_keys, metric_name]
    print np.sum(np.logical_not(x.index==y.index))
    ax.plot(x, y, '.k', label=metric_name)
    plt.setp(ax, xlabel=metric_name.encode('string_escape').replace("_", " "), \
             ylabel=r"crop_yield_field".replace("_", " "))

    plotly_fig = plotly.tools.mpl_to_plotly(fig)
    plotly_fig['data'][0]['text'] = sg_select_keys
    
    plotly_fig['layout']['showlegend'] = True
    plotly_fig['layout']['legend'] = dict(orientation="h")
    iplot(plotly_fig)

0


0


0


In [44]:
smn_select_keys = single_pix_subp_flag.index[single_pix_subp_flag & crop_pix_flag & valid_smn_ts_flag]
y = crop_yield_field.loc[smn_select_keys, 'yield_field']

for i, metric_name in enumerate(smn_ndvi_metrics.columns.values):
    fig, ax = plt.subplots(figsize=(8, 6))
    x = smn_ndvi_metrics.loc[smn_select_keys, metric_name]
    print np.sum(np.logical_not(x.index==y.index))
    ax.plot(x, y, '.k', label=metric_name)
    plt.setp(ax, xlabel=metric_name.encode('string_escape').replace("_", " "), \
             ylabel=r"crop_yield_field".replace("_", " "))

    plotly_fig = plotly.tools.mpl_to_plotly(fig)
    plotly_fig['data'][0]['text'] = smn_select_keys
    
    plotly_fig['layout']['showlegend'] = True
    plotly_fig['layout']['legend'] = dict(orientation="h")
    iplot(plotly_fig)

0


0


0


In [46]:
alos_select_keys = single_pix_subp_flag.index[single_pix_subp_flag & crop_pix_flag]
y = crop_yield_field.loc[alos_select_keys, 'yield_field']

for i, metric_name in enumerate(ts_alos_hh_data.columns.values):
    fig, ax = plt.subplots(figsize=(8, 6))
    x = ts_alos_hh_data.loc[alos_select_keys, metric_name]
    print np.sum(np.logical_not(x.index==y.index))
    ax.plot(x, y, '.k', label=metric_name)
    plt.setp(ax, xlabel="HH DOY = {0:d}".format(metric_name), \
             ylabel=r"crop_yield_field".replace("_", " "))

    plotly_fig = plotly.tools.mpl_to_plotly(fig)
    plotly_fig['data'][0]['text'] = alos_select_keys
    
    plotly_fig['layout']['showlegend'] = True
    plotly_fig['layout']['legend'] = dict(orientation="h")
    iplot(plotly_fig)
    # pply.image.save_as(plotly_fig, "../figures/crop_yield_vs_alos_hh_doy{0:03d}.png".format(metric_name))

0


0


0


0


In [47]:
alos_select_keys = single_pix_subp_flag.index[single_pix_subp_flag & crop_pix_flag]
y = crop_yield_field.loc[sg_select_keys, 'yield_field']

for i, metric_name in enumerate(ts_alos_hv_data.columns.values):
    fig, ax = plt.subplots(figsize=(8, 6))
    x = ts_alos_hv_data.loc[sg_select_keys, metric_name]
    print np.sum(np.logical_not(x.index==y.index))
    ax.plot(x, y, '.k', label=metric_name)
    plt.setp(ax, xlabel="HV DOY = {0:d}".format(metric_name), \
             ylabel=r"crop_yield_field".replace("_", " "))

    plotly_fig = plotly.tools.mpl_to_plotly(fig)
    plotly_fig['data'][0]['text'] = sg_select_keys
    
    plotly_fig['layout']['showlegend'] = True
    plotly_fig['layout']['legend'] = dict(orientation="h")
    iplot(plotly_fig)
    plot(plotly_fig, filename="../figures/crop_yield_vs_alos_hv_doy{0:03d}.html".format(metric_name), auto_open=False)
    pply.image.save_as(plotly_fig, "../figures/crop_yield_vs_alos_hv_doy{0:03d}.png".format(metric_name))

0


0


0


0


In [48]:
prob_idx = single_pix_subp_flag.index[crop_yield_field.loc[in_subp_flag, 'yield_field'].gt(3500) & sg_ndvi_metrics.loc[:, 'ndvi_mean'].gt(0.6) & sg_ndvi_metrics.loc[:, 'ndvi_mean'].lt(0.7)]

In [49]:
gt.plotTs([ts_ndvi_fused_doy, sg_ndvi_doy], \
          [ts_ndvi_fused_data.loc[prob_idx, :], sg_ndvi_ts_data.loc[prob_idx, :]], \
          geo_points.loc[prob_idx, :], \
          plot_kw_dict_list=[dict(label="NDVI Predicted (fused + LC8)"), dict(label="NDVI TIMESAT SG")], \
          style_list=['.r', '-k'], use_plotly=True, save_fig=False)

In [50]:
prob_idx

Index([u'016284-p02', u'016668-p03', u'020376-p23', u'042489-p19',
       u'065186-p02'],
      dtype='object', name=u'full_id')

In [51]:
sg_ndvi_metrics.loc[prob_idx, :]

Unnamed: 0_level_0,ndvi_max,ndvi_mean,ndvi_sum
full_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
016284-p02,0.821309,0.616564,112.831188
016668-p03,0.831008,0.626079,114.57239
020376-p23,0.830928,0.621924,113.812082
042489-p19,0.798519,0.629297,115.161315
065186-p02,0.774194,0.602479,110.253705


In [52]:
img_points_ur.loc[prob_idx, :]

Unnamed: 0_level_0,sample,line
full_id,Unnamed: 1_level_1,Unnamed: 2_level_1
016284-p02,684,449
016668-p03,1209,455
020376-p23,1884,522
042489-p19,1344,1023
065186-p02,702,1531


In [53]:
pd.set_option('display.precision', 6)

In [54]:
crop_yield_field.loc['052699-p05', 'lat_s'], crop_yield_field.loc['052699-p05', 'lon_s'], \
sg_ndvi_metrics.loc['052699-p05', :], \
img_points.loc['052699-p05', :], img_points_ll.loc['052699-p05', :], img_points_ur.loc['052699-p05', :], img_points_ctr.loc['052699-p05', :]

(20.413207, 106.468557, ndvi_max      0.825694
 ndvi_mean     0.506499
 ndvi_sum     92.689291
 Name: 052699-p05, dtype: float64, sample    1410.723254
 line      1248.403864
 Name: 052699-p05, dtype: float64, sample    1410
 line      1248
 Name: 052699-p05, dtype: int64, sample    1410
 line      1248
 Name: 052699-p05, dtype: int64, sample    1410
 line      1248
 Name: 052699-p05, dtype: int64)

In [55]:
from adaptive_cubic_spline import Spline

In [119]:
test_key = np.random.choice(select_keys, 10, replace=False)
print test_key

[u'038849-p36' u'015361-p36' u'027202-p15' u'024847-p07' u'024847-p28'
 u'060116-p16' u'034019-p11' u'020190-p07' u'024019-p01' u'045840-p11']


In [120]:
acsobj = Spline()
test_scale = 1
yfit_test = pd.DataFrame(np.zeros((len(test_key), len(ts_ndvi_fused_new_doy))) + no_data, \
                         index=test_key, columns=ts_ndvi_fused_new_doy)
for k in test_key:
    tmp_flag = np.logical_and(ts_ndvi_fused_new_data.loc[k, :] != no_data, ts_ndvi_fused_new_data.loc[k, :] > 0)
    x = ts_ndvi_fused_new_doy[np.where(tmp_flag)[0]]
    y = ts_ndvi_fused_new_data.loc[k, :].iloc[np.where(tmp_flag)[0]]
    y = np.squeeze(y).values
    print len(x)
    acsobj.fit(x, y*test_scale)
    yfit_test.loc[k, :] = acsobj.predict(ts_ndvi_fused_new_doy) / test_scale
    # yfit = pd.DataFrame(yfit[np.newaxis, :], index=test_key, columns=ts_ndvi_fused_new_doy)

gt.plotTs([ts_ndvi_fused_new_doy, nonfuse_doy, sg_ndvi_doy, ts_ndvi_fused_new_doy], \
          [ts_ndvi_fused_new_data.loc[test_key, :], ts_ndvi_fused_new_data.loc[test_key, nonfuse_doy], \
           sg_ndvi_ts_data.loc[test_key, :], \
           yfit_test], \
          geo_points.loc[test_key, :], \
          plot_kw_dict_list=[dict(label="NDVI Predicted (fused AquaTerra + LC8)"), \
                             dict(label="NDVI Predicted (LC8)"), \
                             dict(label="NDVI TIMESAT SG"), \
                             dict(label="NDVI Adaptive Cubic Spline")], \
          ax_kw_dict=dict(ylim=(0, 1.1)), \
          style_list=['.b', '.r', '-k', '--k'], use_plotly=True, save_fig=False)

45
55
62
63
60
58
53
57
56
58


In [118]:
x0 = np.load('acc-chen/2012_nir_x.npy')
y0 = np.load('acc-chen/2012_nir_y.npy')

ntotal = 60
p = np.zeros(len(x0))
tmp_flag = np.logical_not(np.logical_and(x0>85, x0<175))
tmp = np.sum(tmp_flag)
p[tmp_flag] = 1./tmp
tmp_ind = np.random.choice(np.arange(len(x0)), size=ntotal, replace=False, p=p)
tmp_ind = np.sort(tmp_ind)
x = x0[tmp_ind]
y = y0[tmp_ind]

acsobj.fit(x, y)
xfit = np.arange(x.min(), x.max() + 1)
yfit = acsobj.predict(xfit)

nbad = 15
bad_ind = np.random.choice(np.arange(len(x)), size=nbad, replace=False)
ybad = np.copy(y)
# ybad[bad_ind] = y[bad_ind] + (np.random.rand(nbad)-0.5)*2*1e4*0.1
ybad[bad_ind] = y[bad_ind] - np.random.rand(nbad)*1e4*0.1
acsobj.fit(x, ybad)
yfitbad = acsobj.predict(xfit)

fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(x0, y0, '.k', label="original data")
ax.plot(x, ybad, '.r', label="data+noise")
ax.plot(x, y, '.c', label="data")
ax.plot(xfit, yfit, '--k', label="fitting")
ax.plot(xfit, yfitbad, '-k', label="fitting noisy data")
plotly_fig = plotly.tools.mpl_to_plotly(fig)
plotly_fig['layout']['showlegend'] = True
plotly_fig['layout']['legend'] = dict(orientation="h")
iplot(plotly_fig)