* Extract time-series data for field plots of crop yield harvest
* Save the extracted time-series data into csv files

In [2]:
import os
import sys
import itertools
import glob

import numpy as np
import pandas as pd

In [3]:
import geo_ts as gt

In [3]:
# output csv file name
var_names = ['ndvi', 'evi', 'blue', 'green', 'red', 'nir', 'swir1', 'swir2']
csv_files = ['../results/thai_binh_field_subp_ts_{0:s}_fuse_at.csv'.format(varn) for varn in var_names]
# ts_ndvi_fuse_at_csv_file = "../results/thai_binh_field_subplots_ts_ndvi_fuse_at.csv"
# ts_evi_fuse_at_csv_file = "../results/thai_binh_field_subplots_ts_ndvi_fuse_at.csv"

## Read field data of crop yield

In [4]:
crop_yield_field_file = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-field/Thai_Binh_select_variables_crop_cutting.xlsx"
cls_map_file = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-cls/vietnam_thai_bin_cls_rf_lsat_scenes.img"

crop_yield_field = pd.read_excel(crop_yield_field_file, sheetname=0, \
                                 index_col=2)

colname = crop_yield_field.columns.values
colname[2] = "lat_m" # mesh lat
colname[3] = "lon_m" # mesh lon
colname[4] = "lat_s" # subplot lat
colname[5] = "lon_s" # subplot lon
colname[7] = "yield_field" # field measured yield
crop_yield_field.columns = colname

geo_points = pd.DataFrame(np.zeros((len(crop_yield_field), 2)), index=crop_yield_field.index, columns=['lon', 'lat'])
geo_points.loc[:, 'lat'] = crop_yield_field.loc[:, 'lat_s']
geo_points.loc[:, 'lon'] = crop_yield_field.loc[:, 'lon_s']

proj_points = pd.DataFrame(np.zeros((len(geo_points), 2)), index=geo_points.index, columns=['x', 'y'])
for k in proj_points.index:
    tmp = gt.geo2Proj(cls_map_file, geo_points.loc[k, 'lon'], geo_points.loc[k, 'lat'])
    proj_points.loc[k, 'x'] = tmp[0]
    proj_points.loc[k, 'y'] = tmp[1]
    
img_points = pd.DataFrame(np.zeros((len(geo_points), 2)), index=geo_points.index, columns=['sample', 'line'])
for k in img_points.index:
    tmp = gt.geo2Pixel(cls_map_file, geo_points.loc[k, 'lon'], geo_points.loc[k, 'lat'], ret_int=False)
    img_points.loc[k, 'sample'] = tmp[0]
    img_points.loc[k, 'line'] = tmp[1]

subp_imgsize = 2.5/30
img_points_ur = img_points.copy()
img_points_ur.loc[:, 'sample'] = (img_points.loc[:, 'sample']+subp_imgsize).astype(int)
img_points_ur.loc[:, 'line'] = (img_points.loc[:, 'line']-subp_imgsize).astype(int)

img_points_ll = img_points.copy()
img_points_ll.loc[:, 'sample'] = img_points.loc[:, 'sample'].astype(int)
img_points_ll.loc[:, 'line'] = img_points.loc[:, 'line'].astype(int)

img_points_ul = img_points.copy()
img_points_ul.loc[:, 'sample'] = (img_points.loc[:, 'sample']).astype(int)
img_points_ul.loc[:, 'line'] = (img_points.loc[:, 'line']-subp_imgsize).astype(int)

img_points_lr = img_points.copy()
img_points_lr.loc[:, 'sample'] = (img_points.loc[:, 'sample']+subp_imgsize).astype(int)
img_points_lr.loc[:, 'line'] = (img_points.loc[:, 'line']).astype(int)

img_points_ctr = img_points.copy()
img_points_ctr.loc[:, 'sample'] = (img_points.loc[:, 'sample']+0.5*subp_imgsize).astype(int)
img_points_ctr.loc[:, 'line'] = (img_points.loc[:, 'line']-0.5*subp_imgsize).astype(int)

## Select field subplots falling into our image extent

In [5]:
cls_map_meta = gt.getRasterMetaGdal(cls_map_file)

ncols = cls_map_meta['RasterXSize']
nrows = cls_map_meta['RasterYSize']

in_subp_flag = reduce(np.logical_and, (img_points_ur.loc[:, 'sample']>=0, img_points_ur.loc[:, 'sample']<ncols, \
                                       img_points_ur.loc[:, 'line']>=0, img_points_ur.loc[:, 'line']<nrows, \
                                       img_points_ll.loc[:, 'sample']>=0, img_points_ll.loc[:, 'sample']<ncols, \
                                       img_points_ll.loc[:, 'line']>=0, img_points_ll.loc[:, 'line']<nrows))
print crop_yield_field.loc[np.logical_not(in_subp_flag), :]

crop_yield_field = crop_yield_field.loc[in_subp_flag, :]

geo_points = geo_points.loc[in_subp_flag, :]
proj_points = proj_points.loc[in_subp_flag, :]
img_points = img_points.loc[in_subp_flag, :]
img_points_ur = img_points_ur.loc[in_subp_flag, :]
img_points_ll = img_points_ll.loc[in_subp_flag, :]
img_points_ul = img_points_ur.loc[in_subp_flag, :]
img_points_lr = img_points_ll.loc[in_subp_flag, :]
img_points_ctr = img_points_ctr.loc[in_subp_flag, :]

single_pix_subp_flag = np.logical_and(img_points_ur.loc[:, 'sample']==img_points_ll.loc[:, 'sample'], \
                                      img_points_ur.loc[:, 'line']==img_points_ll.loc[:, 'line'])

## Read time series data of field subplots and write to csv

In [11]:
# # new fusion data by combining both Aqua and Terra
# ts_ndvi_fused_new_files = glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/plndsr_500.126046.2015[0-9][0-9][0-9].ndvi.bin") \
#                           + glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/psub_lndsr.126046.2015[0-9][0-9][0-9].ndvi.bin")
# ts_ndvi_fused_new_files = np.array(ts_ndvi_fused_new_files)
# ts_ndvi_fused_new_doy = np.array([int(imgf.split(".")[2][-3:]) for imgf in ts_ndvi_fused_new_files])
# tmp, cnt = np.unique(ts_ndvi_fused_new_doy, return_counts=True)
# tmp_flag = np.ones(len(ts_ndvi_fused_new_files), dtype=np.bool_)
# nonfuse_flag = np.zeros(len(ts_ndvi_fused_new_files), dtype=np.bool_)
# for d in tmp[cnt>1]:
#     tmp_ind = np.where(ts_ndvi_fused_new_files == "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/plndsr_500.126046.2015{0:03d}.ndvi.bin".format(d))[0]
#     tmp_flag[tmp_ind] = False
#     tmp_ind = np.where(ts_ndvi_fused_new_files == "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/psub_lndsr.126046.2015{0:03d}.ndvi.bin".format(d))[0]
#     nonfuse_flag[tmp_ind] = True
# nonfuse_doy = np.array([int(imgf.split(".")[2][-3:]) for imgf in ts_ndvi_fused_new_files[nonfuse_flag]])
# ts_ndvi_fused_new_files = ts_ndvi_fused_new_files[tmp_flag]

# fnames, tmp = gt.getTsFromImgs(ts_ndvi_fused_new_files, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
# ts_ndvi_fused_new_doy = np.array([int(imgf.split(".")[2][-3:]) for imgf in fnames])
# ts_ndvi_fused_new_data = pd.DataFrame(tmp, columns=ts_ndvi_fused_new_doy, index=img_points_ctr.index)
# # sort the data points according to doy
# sort_ind = np.argsort(ts_ndvi_fused_new_doy)
# ts_ndvi_fused_new_data = ts_ndvi_fused_new_data.reindex_axis(ts_ndvi_fused_new_data.columns[sort_ind], axis='columns', copy=False)
# ts_ndvi_fused_new_doy = ts_ndvi_fused_new_doy[sort_ind]

In [12]:
fuse_file_prefix = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/plndsr_500.126046.2015[0-9][0-9][0-9]."
fuse_file_suffix = ".bin"
obs_file_prefix = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts-new/aqua-and-terra/sub_lndsr.126046.2015[0-9][0-9][0-9]."
obs_file_suffix = ".bin"

ts_files_list = [glob.glob("{0:s}{1:s}{2:s}".format(fuse_file_prefix, varn, fuse_file_suffix)) \
                 + glob.glob("{0:s}{1:s}{2:s}".format(obs_file_prefix, varn, obs_file_suffix)) \
                 for varn in var_names]

doy_func_list = [lambda fstr: int(fstr.split(".")[2][-3:]) for varn in var_names]

In [13]:
def ts_files_to_df(file_list, doy_func, img_points):
    file_list = np.array(file_list)    

    # flag to indicate this is an actual landsat observation rather than fusion results
    obs_flag = np.array([fstr.find('sub_lndsr')>-1 for fstr in file_list])
    # first level index to indicate actual observation or fusion
    first_colname = np.array(['fuse' for fstr in file_list])
    first_colname[obs_flag] = 'obs'
    ts_doy = np.array([doy_func(fstr) for fstr in file_list])
    colname_tuples = list(zip(*[first_colname, ts_doy]))
    ts_mlevel_colname = pd.MultiIndex.from_tuples(colname_tuples, names=['type', 'doy'])

    fnames, tmp = gt.getTsFromImgs(file_list, img_points.loc[:, 'sample'], img_points.loc[:, 'line'])
    ts_data = pd.DataFrame(tmp, columns=ts_mlevel_colname, index=img_points.index)
    return ts_data

In [14]:
_ = [ts_files_to_df(ts_files, doy_func, img_points_ctr).to_csv(csvf) for ts_files, doy_func, csvf in itertools.izip(ts_files_list, doy_func_list, csv_files)]

In [8]:
# time series file by fusion of Landsat and MODIS BEFORE TIMESAT smoothing
ts_ndvi_fused_file = "/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-fusion-ts/predicted_NDVI"
# time series data by SG filter of TIMESAT to the fused data
fitsg_ndvi_imgfiles = glob.glob("/projectnb/echidna/lidar/zhanli86/workspace/data/projects/kaiyu-adb-crop/vietnam-ts-sg/fitSG_NDVI_126046.2015[0-9][0-9][0-9]")

In [10]:
fnames, tmp = gt.getTsFromImgs(fitsg_ndvi_imgfiles, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
sg_ndvi_doy = np.array([int(imgf.split(".")[1][4:]) for imgf in fnames])
first_colname = np.array(['fit' for fstr in fnames])
colname_tuples = list(zip(*[first_colname, sg_ndvi_doy]))
ts_mlevel_colname = pd.MultiIndex.from_tuples(colname_tuples, names=['type', 'doy'])
ts_sg_ndvi = pd.DataFrame(tmp, columns=ts_mlevel_colname, index=img_points_ctr.index)

In [16]:
tmp = gt.readPixelsGdal(ts_ndvi_fused_file, img_points_ctr.loc[:, 'sample'], img_points_ctr.loc[:, 'line'])
ts_ndvi_fused_doy = np.arange(tmp.shape[1])+1
first_colname = np.array(['fuse' for fstr in ts_ndvi_fused_doy])
colname_tuples = list(zip(*[first_colname, ts_ndvi_fused_doy]))
ts_mlevel_colname = pd.MultiIndex.from_tuples(colname_tuples, names=['type', 'doy'])
ts_ndvi_fused = pd.DataFrame(tmp, columns=ts_mlevel_colname, index=img_points_ctr.index)

In [17]:
ts_sg_ndvi.to_csv('../results/thai_binh_field_subp_ts_ndvi_sg.csv')

ts_ndvi_fused.to_csv('../results/thai_binh_field_subp_ts_ndvi_fuse_terra.csv')