In [None]:
import numpy as np
import gdal
from joblib import Parallel, delayed
import datetime
import bucket_util as bu

In [None]:
# divides the image numpy array into list of values corresponding to each year 
def divide_image(img,first,step,num):
    image_list=[]
    for i in range(0,num-1):
        image_list.append(img[:, :, first:first+step])
        first+=step
    image_list.append(img[:, :, first:])
    return image_list

In [None]:
# extends the mask image to 14 with negative values.
def extend_mask(img,num):
    for i in range(0,num):
        img = np.concatenate((img, img[:,:,-2:-1]),axis=2)
    return img

In [None]:
def merge_image(MODIS_img_list,MODIS_temperature_img_list):
    MODIS_list=[]
    for i in range(0,len(MODIS_img_list)):
        img_shape=MODIS_img_list[i].shape
        img_temperature_shape=MODIS_temperature_img_list[i].shape
        img_shape_new=(img_shape[0],img_shape[1],img_shape[2]+img_temperature_shape[2])
        merge=np.empty(img_shape_new)
        for j in range(0,img_shape[2]//7):
            img=MODIS_img_list[i][:,:,(j*7):(j*7+7)]
            temperature=MODIS_temperature_img_list[i][:,:,(j*2):(j*2+2)]
            merge[:,:,(j*9):(j*9+9)]=np.concatenate((img,temperature),axis=2)
        MODIS_list.append(merge)
    return MODIS_list

In [None]:
def mask_image(MODIS_list,MODIS_mask_img_list):
    MODIS_list_masked = []
    for i in range(0, len(MODIS_list)):
        mask = np.tile(MODIS_mask_img_list[i],(1,1,MODIS_list[i].shape[2]))
        masked_img = MODIS_list[i]*mask
        MODIS_list_masked.append(masked_img)
    return MODIS_list_masked

In [None]:
bu.setBucketLocation('D:\crop-yield-prediction-project-master\clean_data\images')
files = bu.walk('data', 'image_full')

In [None]:
file = list(files)
file

In [None]:
fileinfo = file[1]

In [None]:
prefix, datatype, file = fileinfo

In [None]:
data_yield = np.genfromtxt('yield_final.csv', delimiter=',', dtype=float)

In [None]:
MODIS_path = bu.getFullPath(fileinfo)
MODIS_temperature_path = bu.getFullPath(bu.replaceDatatype(fileinfo, 'temperature'))
MODIS_mask_path = bu.getFullPath(bu.replaceDatatype(fileinfo, 'mask'))

In [None]:
raw = file.replace('_',' ').replace('.',' ').split()
loc1 = int(raw[0])
loc2 = int(raw[1])

In [None]:
try:
    MODIS_img = np.transpose(np.array(gdal.Open(MODIS_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
except ValueError as msg:
    print (msg)

In [None]:
MODIS_img.shape # rows x coloums x (46 images (approx) * 7 bands * 14 years)

In [None]:
MODIS_temperature_img = np.transpose(np.array(gdal.Open(MODIS_temperature_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))

In [None]:
MODIS_temperature_img.shape # rows x coloums x (46 images (approx) * 2 bands * 14 years)

In [None]:
# shift
MODIS_temperature_img = MODIS_temperature_img-12000
# scale
MODIS_temperature_img = MODIS_temperature_img*1.25
# clean
MODIS_temperature_img[MODIS_temperature_img<0]=0
MODIS_temperature_img[MODIS_temperature_img>5000]=5000

In [None]:
MODIS_mask_img = np.transpose(np.array(gdal.Open(MODIS_mask_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))

In [None]:
MODIS_mask_img.shape

In [None]:
MODIS_img_list=divide_image(MODIS_img, 0, 46 * 7, 14)
MODIS_temperature_img_list = divide_image(MODIS_temperature_img, 0, 46 * 2, 14)
MODIS_mask_img = extend_mask(MODIS_mask_img, 3)
MODIS_mask_img_list = divide_image(MODIS_mask_img, 0, 1, 14)

In [None]:
len(MODIS_img_list)

In [None]:
img_shape=MODIS_img_list[0].shape
img_shape

In [None]:
MODIS_list = merge_image(MODIS_img_list,MODIS_temperature_img_list)

In [None]:
len(MODIS_list)

In [None]:
img_shape=MODIS_list[0].shape
img_shape

In [None]:
MODIS_list_masked = mask_image(MODIS_list,MODIS_mask_img_list)

In [None]:
mask = np.tile(MODIS_mask_img_list[0],(1,1,MODIS_list[0].shape[2]))
mask.shape

In [None]:
MODIS_list_masked[0].shape

In [None]:
120*103*414

In [None]:
np.count_nonzero(MODIS_list_masked[11])

In [None]:
len(MODIS_list_masked)

In [None]:
year_start = 2003
bu.setBucketLocation('D:\itd\cleandata')

In [None]:
for i in range(0, 14):
            year = i+year_start
            key = np.array([year,loc1,loc2])
            if np.sum(np.all(data_yield[:,0:3] == key, axis=1))>0:
                ## 1 save original file
                filename = bu.getFullPath((prefix, 'output_full', str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy'))
                np.save(filename,MODIS_list_masked[i])
                print (datetime.datetime.now())
                print (filename,':written ')

In [None]:
for f in file:
    prefix, datatype, file = f
    data_yield = np.genfromtxt('yield_final.csv', delimiter=',', dtype=float)
    MODIS_path = bu.getFullPath(f)
    MODIS_temperature_path = bu.getFullPath(bu.replaceDatatype(f, 'temperature'))
    MODIS_mask_path = bu.getFullPath(bu.replaceDatatype(f, 'mask'))
    raw = file.replace('_',' ').replace('.',' ').split()
    loc1 = int(raw[0])
    loc2 = int(raw[1])
    try:
        MODIS_img = np.transpose(np.array(gdal.Open(MODIS_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
    except ValueError as msg:
        print (msg)
    MODIS_temperature_img = np.transpose(np.array(gdal.Open(MODIS_temperature_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
 # shift
    MODIS_temperature_img = MODIS_temperature_img-12000
        # scale
    MODIS_temperature_img = MODIS_temperature_img*1.25
        # clean
    MODIS_temperature_img[MODIS_temperature_img<0]=0
    MODIS_temperature_img[MODIS_temperature_img>5000]=5000
    MODIS_mask_img = np.transpose(np.array(gdal.Open(MODIS_mask_path).ReadAsArray(), dtype='uint16'),axes=(1,2,0))
        # Non-crop = 0, crop = 1
    MODIS_mask_img[MODIS_mask_img != 12] = 0
    MODIS_mask_img[MODIS_mask_img == 12] = 1
    MODIS_img_list=divide_image(MODIS_img, 0, 46 * 7, 14)
    MODIS_temperature_img_list = divide_image(MODIS_temperature_img, 0, 46 * 2, 14)
    MODIS_mask_img = extend_mask(MODIS_mask_img, 3)
    MODIS_mask_img_list = divide_image(MODIS_mask_img, 0, 1, 14)
    MODIS_list = merge_image(MODIS_img_list,MODIS_temperature_img_list)
        # Do the mask job
    MODIS_list_masked = mask_image(MODIS_list,MODIS_mask_img_list)
    year_start = 2003
    bu.setBucketLocation('D:\itd\cleandata')
    for i in range(0, 14):
            year = i+year_start
            key = np.array([year,loc1,loc2])
            if np.sum(np.all(data_yield[:,0:3] == key, axis=1))>0:
                ## 1 save original file
                filename = bu.getFullPath((prefix, 'output_full', str(year)+'_'+str(loc1)+'_'+str(loc2)+'.npy'))
                np.save(filename,MODIS_list_masked[i])
                print (datetime.datetime.now())
                print (filename,':written ')
    bu.setBucketLocation('D:\crop-yield-prediction-project-master\clean_data\images')