In [1]:
from pyEDM import *
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import os
import geopandas as gpd
from shapely.geometry import Polygon
import rasterio
import xarray as xr
from mpl_toolkits.axes_grid1 import make_axes_locatable
import fiona
import rasterio.mask
from descartes import PolygonPatch
from rasterio.plot import show
from tqdm import tqdm
import datetime
import random
from sklearn.preprocessing import StandardScaler
import scipy.stats
from datetime import datetime

In [2]:
precip_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRPS/Anomaly/'
temp_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/LST/Anomaly/'
ndvi_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/eMODIS_NDVI/Anomaly/'

In [3]:
data_volumes = '/home/rgreen/DroughtEDM/Data/data_volumes/'

In [4]:
def generate_dekads(start, end):
    '''
    Generates list of timestamps of dekads between two dates
    
    Parameters
    ----------
    start : pd.datetime object
        Start date of list
    end : pd.datetime object
        end date of list
    '''
    
    dtrange = pd.date_range(start, end)
    days = list(range(len(dtrange))) #length of dtrange
    daysDF = pd.DataFrame({'Days': days}, index=dtrange)
    d = daysDF.index.day - np.clip((daysDF.index.day-1) // 10, 0, 2)*10 - 1 
    dekaddates = daysDF.index.values - np.array(d, dtype="timedelta64[D]")
    dekads = daysDF.groupby(dekaddates).mean()
    dekads = dekads.index
    
    return dekads

In [5]:
start = pd.datetime(2002,7,1)
end = pd.datetime(2019,4,30)

dekads = generate_dekads(start, end)

In [6]:
def pixelwise_ts_table(in_dir, keyword):
    
    '''
    This function searches through the file directory and creates a dataframe of pixel values over time from rasters
    based on a given keyword describing the environmental variable of interest to generate a raster time series stack 
    
   In the output dataframe, columns are each pixel extracted from the raster (read squentially into a list)
   and rows are the same pixel over time (dekads going down)
    
    Args:
    - in_dir: path to the input directory
    -keyword: string that is unique to the environmental variable
        options: ['precip', 'temp', 'ndvi']
    '''
    
    
    files=np.array(sorted(os.listdir(in_dir)))
    tifs = pd.Series(files).str.contains(keyword)
    files = files[tifs]
        
    pixelwise_TS = []
    
    for filename in tqdm(files): 
        
        
            open_file = xr.open_rasterio(in_dir+filename).sel(band=1)
            array = open_file.values
            
            pixel_list = array.ravel().tolist()
            
            pixelwise_TS.append(pixel_list)

    return pd.DataFrame(pixelwise_TS)

    

In [7]:
precip_table_anom = pixelwise_ts_table(precip_anom_dir, 'precip')
temp_table_anom = pixelwise_ts_table(temp_anom_dir, 'temp')
ndvi_table_anom = pixelwise_ts_table(ndvi_anom_dir, 'ndvi')

100%|██████████| 606/606 [00:09<00:00, 62.46it/s]
100%|██████████| 606/606 [00:08<00:00, 68.88it/s]
100%|██████████| 606/606 [00:09<00:00, 64.93it/s]


In [None]:
#precip_table_anom.to_csv(data_volumes + 'precip_table_anom.csv')

In [None]:
#temp_table_anom.to_csv(data_volumes + 'temp_table_anom.csv')

In [None]:
#ndvi_table_anom.to_csv(data_volumes + 'ndvi_table_anom.csv')

In [8]:
precip_table_anom.label = 'precip'
precip_table_anom.rs_rows = 674
precip_table_anom.rs_cols = 583
precip_table_anom.n_samples = 606
ndvi_table_anom.label = 'ndvi'
ndvi_table_anom.rs_rows = 674
ndvi_table_anom.rs_cols = 583
ndvi_table_anom.n_samples = 606
temp_table_anom.label = 'temp'
temp_table_anom.rs_rows = 674
temp_table_anom.rs_cols = 583
temp_table_anom.n_samples = 606

table_list_anom = [precip_table_anom, ndvi_table_anom, temp_table_anom]

In [9]:
ndvi_table_anom

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,392932,392933,392934,392935,392936,392937,392938,392939,392940,392941
0,1.101671,1.267218,0.938137,1.100588,1.324199,1.315054,1.165266,1.113739,0.995424,1.331126,...,,,,,,,,,,
1,0.981070,1.093294,0.873698,1.124495,1.246212,1.232029,1.013628,0.881350,0.986223,1.310523,...,,,,,,,,,,
2,0.620074,0.497290,0.323125,0.514614,1.014546,0.785064,0.773919,0.555548,0.556693,0.720380,...,,,,,,,,,,
3,1.039260,0.837557,0.609005,0.773013,1.242413,1.024316,1.137690,0.987828,1.089278,1.286157,...,,,,,,,,,,
4,0.971371,0.751863,0.467435,0.481555,1.118889,0.952852,1.045681,0.991793,0.930235,0.796915,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,0.208073,0.026296,-0.284296,0.197938,-0.367827,-0.628079,-0.157488,-0.532811,-0.979176,-0.508893,...,,,,,,,,,,
602,0.522922,0.110877,-0.002413,0.636307,-0.054164,-0.157637,0.066254,-0.048598,-0.132028,0.265398,...,,,,,,,,,,
603,0.331863,-0.081993,-0.378869,-0.353615,-0.445762,-0.467402,-0.187034,-0.058811,0.460797,0.651812,...,,,,,,,,,,
604,0.306670,-0.136034,-0.296636,0.203851,-0.584455,-0.485288,-0.248363,-0.311625,-0.056771,0.484504,...,,,,,,,,,,


In [10]:
def multi_pixelwise_simplex(tables, target_label, train, test, ed, pi, starting_column, num_columns, simplex_out=None): #year month_list, 
    
    '''
    This function computes the simplex prediction skill for a given embedding dimensions, prediction interval, land cover and environmental variable
    across pixels given multiple tables of different variables of pixel values across time for a region from the function pixelwise_ts_table 

    The result is an np.array of simplex error values reshaped back into the shape of the original raster image
    
    Args:
    - table: list of input tables, where every table has columns with the time series of each pixel in a raster
    - target_label: text label for the table to be used as the target (e.g. 'ndvi')
    - train: portion of data library used to train (string of range with space between values)
    - test: portion of data libary used to test (string of range with space between values) 
    - ed: embedding dimension
    - pi: prediction interval (Tp) 
    - rs_rows: number of rows to reshape list into original raster size
    - rs_cols: number of columns to reshape list into original raster size
   
    '''

    table_dict = {'Time': dekads }
    
    if simplex_out is None:
        simplex_out = pd.DataFrame()
        
    labels = ' '.join([table.label for table in tables]) # Assumes that all tables have labels

    # Generate future dekads
    dekads_fut = generate_dekads(dekads[-1],pd.datetime(2020,4,30))
    
   
    
    # Iterate through columns (pixels)
    for col_idx in tqdm((range(starting_column, starting_column+num_columns))):
        
        now=datetime.now()
        # PIXEL DATAFRAME
        # Generate dataframe for pixel where columns correspond to variables and rows to timestamp (image)
        [table_dict.update({table.label: table.iloc[:,col_idx]}) for i, table in enumerate(tables)]
        #interim_df = pd.DataFrame(table_dict, index = dekads)
        interim_df = pd.DataFrame(table_dict)

        interim_df.set_index('Time',inplace=True, drop=False)
        
        
                # If column has lots of nans (water), append nan
        if max([len(interim_df[interim_df[col_idx].isnull()]) for col_idx in interim_df.columns]) > 20:
            
            simplex_out[col_idx] = [np.nan]*(len(interim_df)-ed+pi+1)
        
        else:

            # Check whether column has a few nans
            if interim_df.temp.isnull().values.any():
                # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.temp.isnull()].index:
                    interim_df.temp.loc[t] = interim_df.temp[interim_df.index.month == t.month].mean()
                    #print(interim_df)
            if interim_df.ndvi.isnull().values.any():
            # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.ndvi.isnull()].index:
                    interim_df.ndvi.loc[t] = interim_df.ndvi[interim_df.index.month == t.month].mean()
            if interim_df.precip.isnull().values.any():
            # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.precip.isnull()].index:
                    interim_df.precip.loc[t] = interim_df.precip[interim_df.index.month == t.month].mean()
            #print(interim_df)
            
            # Run simplex
            
            simplex_result = Simplex(dataFrame = interim_df,
                                     lib = train,
                                     pred = test,
                                     E = ed, Tp = pi,
                                     columns = labels, target = target_label, showPlot = False)

            
            
            # Add datetime index + fix dekad values for future values
            simplex_result.Time = pd.to_datetime(simplex_result.Time)
            simplex_result.set_index(simplex_result.Time, inplace=True)
            
            
            # Fix datetime index for future dekads
            simplex_result[-pi:].Time = dekads_fut[1:pi+1]
            
            #simplex_result.dropna()
            simplex_out[col_idx] = simplex_result.Predictions
            #print(simplex_out[i])
            delta=datetime.now()-now
            #print(delta)
    return simplex_out
    


In [None]:
simplex_df = pd.DataFrame()
num_col_in_chunk = 1000
for starting_col in range(0, 392942, num_col_in_chunk):#392942
    ndvi_ea_simplex= multi_pixelwise_simplex(table_list_anom, 'ndvi', train = '1 606', test = '1 606',ed = 6, pi = 1, starting_column = starting_col, num_columns = num_col_in_chunk, simplex_out=simplex_df)
    new_ndvi_ea_simplex = ndvi_ea_simplex.iloc[:,-num_col_in_chunk:]
    new_ndvi_ea_simplex.to_csv(data_volumes+'ndvi_ea_simplex_ed6_pi1_0' + str(starting_col) + '.csv')

100%|██████████| 1000/1000 [00:48<00:00, 20.44it/s]
100%|██████████| 1000/1000 [00:51<00:00, 19.53it/s]
100%|██████████| 1000/1000 [00:49<00:00, 20.41it/s]
100%|██████████| 1000/1000 [00:50<00:00, 19.86it/s]
100%|██████████| 1000/1000 [00:50<00:00, 19.72it/s]
100%|██████████| 1000/1000 [00:52<00:00, 19.08it/s]
100%|██████████| 1000/1000 [00:50<00:00, 19.89it/s]
100%|██████████| 1000/1000 [00:50<00:00, 19.66it/s]
100%|██████████| 1000/1000 [00:52<00:00, 19.06it/s]
100%|██████████| 1000/1000 [00:50<00:00, 19.94it/s]
100%|██████████| 1000/1000 [00:51<00:00, 19.43it/s]
  6%|▌         | 60/1000 [00:03<00:50, 18.49it/s]

In [None]:
x