In [1]:
from pyEDM import *
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import os
import geopandas as gpd
from shapely.geometry import Polygon
import rasterio
import xarray as xr
from mpl_toolkits.axes_grid1 import make_axes_locatable
import fiona
import rasterio.mask
from descartes import PolygonPatch
from rasterio.plot import show
from tqdm import tqdm
import datetime
import random
from sklearn.preprocessing import StandardScaler
import scipy.stats
from datetime import datetime

In [None]:
precip_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRPS/Anomaly/'
temp_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/LST/Anomaly/'
ndvi_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/eMODIS_NDVI/Anomaly/'

In [3]:
data_volumes = '/home/rgreen/DroughtEDM/Data/data_volumes/'

In [4]:
def generate_dekads(start, end):
    '''
    Generates list of timestamps of dekads between two dates
    
    Parameters
    ----------
    start : pd.datetime object
        Start date of list
    end : pd.datetime object
        end date of list
    '''
    
    dtrange = pd.date_range(start, end)
    days = list(range(len(dtrange))) #length of dtrange
    daysDF = pd.DataFrame({'Days': days}, index=dtrange)
    d = daysDF.index.day - np.clip((daysDF.index.day-1) // 10, 0, 2)*10 - 1 
    dekaddates = daysDF.index.values - np.array(d, dtype="timedelta64[D]")
    dekads = daysDF.groupby(dekaddates).mean()
    dekads = dekads.index
    
    return dekads

In [5]:
start = pd.datetime(2002,7,1)
end = pd.datetime(2019,4,30)

dekads = generate_dekads(start, end)

In [6]:
def pixelwise_ts_table(in_dir, keyword):
    
    '''
    This function searches through the file directory and creates a dataframe of pixel values over time from rasters
    based on a given keyword describing the environmental variable of interest to generate a raster time series stack 
    
   In the output dataframe, columns are each pixel extracted from the raster (read squentially into a list)
   and rows are the same pixel over time (dekads going down)
    
    Args:
    - in_dir: path to the input directory
    -keyword: string that is unique to the environmental variable
        options: ['precip', 'temp', 'ndvi']
    '''
    
    
    files=np.array(sorted(os.listdir(in_dir)))
    tifs = pd.Series(files).str.contains(keyword)
    files = files[tifs]
        
    pixelwise_TS = []
    
    for filename in tqdm(files): 
        
        
            open_file = xr.open_rasterio(in_dir+filename).sel(band=1)
            array = open_file.values
            
            pixel_list = array.ravel().tolist()
            
            pixelwise_TS.append(pixel_list)

    return pd.DataFrame(pixelwise_TS)

    

In [7]:
precip_table_anom = pixelwise_ts_table(precip_anom_dir, 'precip')
temp_table_anom = pixelwise_ts_table(temp_anom_dir, 'temp')
ndvi_table_anom = pixelwise_ts_table(ndvi_anom_dir, 'ndvi')

100%|██████████| 606/606 [00:09<00:00, 62.46it/s]
100%|██████████| 606/606 [00:08<00:00, 68.88it/s]
100%|██████████| 606/606 [00:09<00:00, 64.93it/s]


In [None]:
#precip_table_anom.to_csv(data_volumes + 'precip_table_anom.csv')

In [None]:
#temp_table_anom.to_csv(data_volumes + 'temp_table_anom.csv')

In [None]:
#ndvi_table_anom.to_csv(data_volumes + 'ndvi_table_anom.csv')

In [8]:
precip_table_anom.label = 'precip'
precip_table_anom.rs_rows = 674
precip_table_anom.rs_cols = 583
precip_table_anom.n_samples = 606
ndvi_table_anom.label = 'ndvi'
ndvi_table_anom.rs_rows = 674
ndvi_table_anom.rs_cols = 583
ndvi_table_anom.n_samples = 606
temp_table_anom.label = 'temp'
temp_table_anom.rs_rows = 674
temp_table_anom.rs_cols = 583
temp_table_anom.n_samples = 606

table_list_anom = [precip_table_anom, ndvi_table_anom, temp_table_anom]

In [9]:
ndvi_table_anom

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,392932,392933,392934,392935,392936,392937,392938,392939,392940,392941
0,1.101671,1.267218,0.938137,1.100588,1.324199,1.315054,1.165266,1.113739,0.995424,1.331126,...,,,,,,,,,,
1,0.981070,1.093294,0.873698,1.124495,1.246212,1.232029,1.013628,0.881350,0.986223,1.310523,...,,,,,,,,,,
2,0.620074,0.497290,0.323125,0.514614,1.014546,0.785064,0.773919,0.555548,0.556693,0.720380,...,,,,,,,,,,
3,1.039260,0.837557,0.609005,0.773013,1.242413,1.024316,1.137690,0.987828,1.089278,1.286157,...,,,,,,,,,,
4,0.971371,0.751863,0.467435,0.481555,1.118889,0.952852,1.045681,0.991793,0.930235,0.796915,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,0.208073,0.026296,-0.284296,0.197938,-0.367827,-0.628079,-0.157488,-0.532811,-0.979176,-0.508893,...,,,,,,,,,,
602,0.522922,0.110877,-0.002413,0.636307,-0.054164,-0.157637,0.066254,-0.048598,-0.132028,0.265398,...,,,,,,,,,,
603,0.331863,-0.081993,-0.378869,-0.353615,-0.445762,-0.467402,-0.187034,-0.058811,0.460797,0.651812,...,,,,,,,,,,
604,0.306670,-0.136034,-0.296636,0.203851,-0.584455,-0.485288,-0.248363,-0.311625,-0.056771,0.484504,...,,,,,,,,,,


In [60]:
def multi_pixelwise_simplex(tables, target_label, train, test, ed, pi, starting_column, num_columns, simplex_out=None): #year month_list, 
    
    '''
    This function computes the simplex prediction skill for a given embedding dimensions, prediction interval, land cover and environmental variable
    across pixels given multiple tables of different variables of pixel values across time for a region from the function pixelwise_ts_table 

    The result is an np.array of simplex error values reshaped back into the shape of the original raster image
    
    Args:
    - table: list of input tables, where every table has columns with the time series of each pixel in a raster
    - target_label: text label for the table to be used as the target (e.g. 'ndvi')
    - train: portion of data library used to train (string of range with space between values)
    - test: portion of data libary used to test (string of range with space between values) 
    - ed: embedding dimension
    - pi: prediction interval (Tp) 
    - rs_rows: number of rows to reshape list into original raster size
    - rs_cols: number of columns to reshape list into original raster size
   
    '''

    table_dict = {'Time': dekads }
    
    if simplex_out is None:
        simplex_out = pd.DataFrame()
        
    labels = ' '.join([table.label for table in tables]) # Assumes that all tables have labels

    # Generate future dekads
    dekads_fut = generate_dekads(dekads[-1],pd.datetime(2020,4,30))
    
   
    
    # Iterate through columns (pixels)
    for col_idx in tqdm((range(starting_column, starting_column+num_columns))):
        
        now=datetime.now()
        # PIXEL DATAFRAME
        # Generate dataframe for pixel where columns correspond to variables and rows to timestamp (image)
        [table_dict.update({table.label: table.iloc[:,col_idx]}) for i, table in enumerate(tables)]
        #interim_df = pd.DataFrame(table_dict, index = dekads)
        interim_df = pd.DataFrame(table_dict)

        interim_df.set_index('Time',inplace=True, drop=False)
        print(interim_df)
        
                # If column has lots of nans (water), append nan
        if max([len(interim_df[interim_df[col_idx].isnull()]) for col_idx in interim_df.columns]) > 20:
            
            simplex_out[col_idx] = [np.nan]*(len(interim_df)-ed+pi+1)
        
        else:

            # Check whether column has a few nans
            if interim_df.temp.isnull().values.any():
                # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.temp.isnull()].index:
                    interim_df.temp.loc[t] = interim_df.temp[interim_df.index.month == t.month].mean()
                    #print(interim_df)
            if interim_df.ndvi.isnull().values.any():
            # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.ndvi.isnull()].index:
                    interim_df.ndvi.loc[t] = interim_df.ndvi[interim_df.index.month == t.month].mean()
            if interim_df.precip.isnull().values.any():
            # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.precip.isnull()].index:
                    interim_df.precip.loc[t] = interim_df.precip[interim_df.index.month == t.month].mean()
            #print(interim_df)
            
            # Run simplex
            
            simplex_result = Simplex(dataFrame = interim_df,
                                     lib = train,
                                     pred = test,
                                     E = ed, Tp = pi,
                                     columns = labels, target = target_label, showPlot = False)

            
            
            # Add datetime index + fix dekad values for future values
            simplex_result.Time = pd.to_datetime(simplex_result.Time)
            simplex_result.set_index(simplex_result.Time, inplace=True)
            
            
            # Fix datetime index for future dekads
            simplex_result[-pi:].Time = dekads_fut[1:pi+1]
            
            #simplex_result.dropna()
            simplex_out[col_idx] = simplex_result.Predictions
            #print(simplex_out[i])
            delta=datetime.now()-now
            #print(delta)
    return simplex_out
    


In [61]:
simplex_df = pd.DataFrame()
num_col_in_chunk = 1000
for starting_col in range(0, 392942, num_col_in_chunk):#392942
    ndvi_ea_simplex= multi_pixelwise_simplex(table_list_anom, 'ndvi', train = '1 606', test = '1 606',ed = 6, pi = 1, starting_column = starting_col, num_columns = num_col_in_chunk, simplex_out=simplex_df)
    new_ndvi_ea_simplex = ndvi_ea_simplex.iloc[:,-num_col_in_chunk:]
    new_ndvi_ea_simplex.to_csv(data_volumes+'ndvi_ea_simplex_ed6_pi1_0' + str(starting_col) + '.csv')

  0%|          | 3/1000 [00:00<01:38, 10.10it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.230411  1.101671  0.936865
2002-07-11 2002-07-11  0.456207  0.981070  1.254471
2002-07-21 2002-07-21  0.341468  0.620074  0.939283
2002-08-01 2002-08-01 -0.602614  1.039260  1.219083
2002-08-11 2002-08-11 -0.778704  0.971371  1.219083
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.449023  0.208073 -0.716793
2019-03-21 2019-03-21 -0.257558  0.522922 -0.975344
2019-04-01 2019-04-01 -0.119228  0.331863 -0.023442
2019-04-11 2019-04-11  0.067158  0.306670  0.151567
2019-04-21 2019-04-21  0.649589  0.452332  0.255217

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.104117  1.267218  0.931827
2002-07-11 2002-07-11  0.288625  1.093294  1.250847
2002-07-21 2002-07-21  0.195664  0.497290  0.934252
2002-08-01 2002-08-01 -0.590533  0.83755

  1%|          | 9/1000 [00:00<01:14, 13.35it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.167321  1.315054  0.933264
2002-07-11 2002-07-11  0.372021  1.232029  1.223881
2002-07-21 2002-07-21  0.270916  0.785064  0.940019
2002-08-01 2002-08-01 -0.567385  1.024316  1.210575
2002-08-11 2002-08-11 -0.723387  0.952852  1.210575
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.501739 -0.628079 -0.911280
2019-03-21 2019-03-21 -0.335796 -0.157637 -0.918751
2019-04-01 2019-04-01 -0.212879 -0.467402  0.085202
2019-04-11 2019-04-11 -0.037195 -0.485288  0.154720
2019-04-21 2019-04-21  0.537711 -0.441156  0.284604

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.233497  1.165266  0.959568
2002-07-11 2002-07-11  0.450516  1.013628  1.269042
2002-07-21 2002-07-21  0.343799  0.773919  0.947982
2002-08-01 2002-08-01 -0.542095  1.13769

  2%|▏         | 15/1000 [00:01<01:06, 14.88it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.496607  1.329238  0.915819
2002-07-11 2002-07-11  0.840212  1.307524  1.222635
2002-07-21 2002-07-21  0.767770  1.241685  0.998795
2002-08-01 2002-08-01 -0.316851  1.436970  1.265535
2002-08-11 2002-08-11 -0.595801  0.860072  1.265535
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.282353 -0.193779 -0.599720
2019-03-21 2019-03-21  0.175433 -0.533777 -0.942858
2019-04-01 2019-04-01 -0.082313 -0.480860  0.109244
2019-04-11 2019-04-11  0.045937 -0.475836 -0.018615
2019-04-21 2019-04-21  0.505503 -0.433525  0.311775

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.479061  1.023623  0.920436
2002-07-11 2002-07-11  1.550110  1.021885  1.262913
2002-07-21 2002-07-21  0.782036  0.857924  1.006377
2002-08-01 2002-08-01 -0.179790  1.24770

  2%|▏         | 19/1000 [00:01<01:02, 15.70it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  1.252521  1.053997  0.870787
2002-07-11 2002-07-11  1.861264  0.983687  1.258777
2002-07-21 2002-07-21  2.033837  0.923182  0.974806
2002-08-01 2002-08-01  1.179195  1.101123  1.279471
2002-08-11 2002-08-11  0.981474  0.720215  1.279471
...               ...       ...       ...       ...
2019-03-11 2019-03-11  0.450465 -1.028213 -0.834572
2019-03-21 2019-03-21  0.707679 -0.766935 -1.083033
2019-04-01 2019-04-01  0.700703 -0.732511  0.058165
2019-04-11 2019-04-11  0.726523 -0.991975 -0.001306
2019-04-21 2019-04-21  0.544883 -0.950347  0.322304

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  1.068551  1.256579  0.862959
2002-07-11 2002-07-11  1.642128  1.238086  1.243058
2002-07-21 2002-07-21  1.859209  1.222077  0.964142
2002-08-01 2002-08-01  1.241159  1.30204

  2%|▎         | 25/1000 [00:01<01:00, 16.06it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.419944  1.289538  0.929444
2002-07-11 2002-07-11  0.760817  1.313953  1.194834
2002-07-21 2002-07-21  1.014087  1.096085  1.012427
2002-08-01 2002-08-01  1.073822  1.211442  1.320872
2002-08-11 2002-08-11  1.114095  1.144408  1.320872
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.792109 -1.219838 -0.491224
2019-03-21 2019-03-21 -0.668212 -0.815047 -0.789385
2019-04-01 2019-04-01 -0.233358 -0.432058 -0.010840
2019-04-11 2019-04-11 -0.053956 -0.574336  0.035112
2019-04-21 2019-04-21  0.156540 -0.607760  0.278568

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.402471  1.536188  0.881630
2002-07-11 2002-07-11  0.744729  1.656644  1.187461
2002-07-21 2002-07-21  1.028971  1.255022  1.025387
2002-08-01 2002-08-01  1.189269  1.23903

  3%|▎         | 29/1000 [00:01<00:59, 16.23it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.657748  1.167858  0.924146
2002-07-11 2002-07-11  1.085140  1.189731  1.114311
2002-07-21 2002-07-21  1.584156  1.063419  1.233873
2002-08-01 2002-08-01  2.244735  1.061843  1.279581
2002-08-11 2002-08-11  2.471701  0.734579  1.279581
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.755538 -1.125660 -0.701673
2019-03-21 2019-03-21 -0.603352 -0.083836 -0.799675
2019-04-01 2019-04-01  0.006504  0.012728 -0.155698
2019-04-11 2019-04-11  0.184009 -0.742353  0.107064
2019-04-21 2019-04-21  0.088180 -0.707142  0.348244

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.610620  1.088126  0.932917
2002-07-11 2002-07-11  1.040971  1.182535  1.120514
2002-07-21 2002-07-21  1.578445  0.878455  1.225594
2002-08-01 2002-08-01  2.355072  0.98937

  4%|▎         | 35/1000 [00:02<00:58, 16.55it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.442294  1.271333  0.868624
2002-07-11 2002-07-11  0.783449  1.350889  1.056342
2002-07-21 2002-07-21  1.347088  1.060266  1.215056
2002-08-01 2002-08-01  2.378680  0.830834  1.270779
2002-08-11 2002-08-11  2.691409  0.982389  1.270779
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.654131  0.296215 -0.808604
2019-03-21 2019-03-21 -0.513523  0.628761 -0.945569
2019-04-01 2019-04-01  0.071976  1.297653 -0.235454
2019-04-11 2019-04-11  0.219927  1.308857  0.024136
2019-04-21 2019-04-21  0.029432  1.257521  0.304555

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.394003  1.727795  0.955823
2002-07-11 2002-07-11  0.735222  1.748436  1.104729
2002-07-21 2002-07-21  1.318625  1.252504  1.162629
2002-08-01 2002-08-01  2.421031  1.28144

  4%|▍         | 40/1000 [00:02<01:01, 15.55it/s]

                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.250148  0.792323  0.897026
2002-07-11 2002-07-11  0.539512  0.365058  1.047741
2002-07-21 2002-07-21  1.162052  0.316087  1.236041
2002-08-01 2002-08-01  2.514736  0.543027  1.249785
2002-08-11 2002-08-11  2.936324  0.538209  1.249785
...               ...       ...       ...       ...
2019-03-11 2019-03-11 -0.611919  0.417069 -0.525909
2019-03-21 2019-03-21 -0.502090  0.346957 -0.764422
2019-04-01 2019-04-01 -0.049422  0.738067 -0.207204
2019-04-11 2019-04-11  0.077967  0.927542 -0.110809
2019-04-21 2019-04-21 -0.049422  0.933389  0.355416

[606 rows x 4 columns]
                 Time    precip      ndvi      temp
Time                                               
2002-07-01 2002-07-01  0.239410  1.501028  0.917749
2002-07-11 2002-07-11  0.527368  1.560650  1.097256
2002-07-21 2002-07-21  1.163622  1.198104  1.227639
2002-08-01 2002-08-01  2.577816  1.37848




KeyboardInterrupt: 

In [None]:
x