In [1]:
from pyEDM import *
import pandas as pd
import numpy as np
import matplotlib as mpl
from matplotlib import pyplot as plt
import os
import geopandas as gpd
from shapely.geometry import Polygon
import rasterio
import xarray as xr
from mpl_toolkits.axes_grid1 import make_axes_locatable
import fiona
import rasterio.mask
from descartes import PolygonPatch
from rasterio.plot import show
from tqdm import tqdm
import datetime
import random
from sklearn.preprocessing import StandardScaler
import scipy.stats
from datetime import datetime


In [44]:
import warnings
warnings.filterwarnings("ignore")

In [69]:
from dask.distributed import Client

client = Client(threads_per_worker=6, n_workers=1)

In [70]:
client

0,1
Client  Scheduler: tcp://127.0.0.1:37340  Dashboard: http://127.0.0.1:44329/status,Cluster  Workers: 1  Cores: 6  Memory: 135.08 GB


In [51]:
from dask import delayed

In [38]:
import dask.dataframe as dd

In [2]:
precip_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRPS/Anomaly/'
temp_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/LST/Anomaly/'
ndvi_anom_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/eMODIS_NDVI/Anomaly/'

In [2]:
precip_orig_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/CHIRPS/EA/'
temp_orig_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/LST/EA/'
ndvi_orig_dir = '/home/rgreen/tana-spin/rgreen/DroughtEDM/Data/eMODIS_NDVI/EA/'

In [3]:
data_volumes = '/home/rgreen/DroughtEDM/Data/data_volumes/'

In [3]:
def generate_dekads(start, end):
    '''
    Generates list of timestamps of dekads between two dates
    
    Parameters
    ----------
    start : pd.datetime object
        Start date of list
    end : pd.datetime object
        end date of list
    '''
    
    dtrange = pd.date_range(start, end)
    days = list(range(len(dtrange))) #length of dtrange
    daysDF = pd.DataFrame({'Days': days}, index=dtrange)
    d = daysDF.index.day - np.clip((daysDF.index.day-1) // 10, 0, 2)*10 - 1 
    dekaddates = daysDF.index.values - np.array(d, dtype="timedelta64[D]")
    dekads = daysDF.groupby(dekaddates).mean()
    dekads = dekads.index
    
    return dekads

In [4]:
start = pd.datetime(2002,7,1)
end = pd.datetime(2019,4,30)

dekads = generate_dekads(start, end)

In [128]:
start = pd.datetime(2002,8, 21)
end = pd.datetime(2019,5,1)

dekads_pi1 = generate_dekads(start, end)

In [129]:
dekads_pi1

DatetimeIndex(['2002-08-21', '2002-09-01', '2002-09-11', '2002-09-21',
               '2002-10-01', '2002-10-11', '2002-10-21', '2002-11-01',
               '2002-11-11', '2002-11-21',
               ...
               '2019-02-01', '2019-02-11', '2019-02-21', '2019-03-01',
               '2019-03-11', '2019-03-21', '2019-04-01', '2019-04-11',
               '2019-04-21', '2019-05-01'],
              dtype='datetime64[ns]', length=602, freq=None)

In [5]:
def pixelwise_ts_table(in_dir, keyword):
    
    '''
    This function searches through the file directory and creates a dataframe of pixel values over time from rasters
    based on a given keyword describing the environmental variable of interest to generate a raster time series stack 
    
   In the output dataframe, columns are each pixel extracted from the raster (read squentially into a list)
   and rows are the same pixel over time (dekads going down)
    
    Args:
    - in_dir: path to the input directory
    -keyword: string that is unique to the environmental variable
        options: ['precip', 'temp', 'ndvi']
    '''
    
    
    files=np.array(sorted(os.listdir(in_dir)))
    tifs = pd.Series(files).str.contains(keyword)
    files = files[tifs]
        
    pixelwise_TS = []
    
    for filename in tqdm(files): 
        
        
            open_file = xr.open_rasterio(in_dir+filename).sel(band=1)
            array = open_file.values
            
            pixel_list = array.ravel().tolist()
            
            pixelwise_TS.append(pixel_list)

    return pd.DataFrame(pixelwise_TS)

    

In [7]:
precip_table_anom = pixelwise_ts_table(precip_anom_dir, 'precip')
temp_table_anom = pixelwise_ts_table(temp_anom_dir, 'temp')
ndvi_table_anom = pixelwise_ts_table(ndvi_anom_dir, 'ndvi')

100%|██████████| 606/606 [00:09<00:00, 65.94it/s]
100%|██████████| 606/606 [00:08<00:00, 67.76it/s]
100%|██████████| 606/606 [00:08<00:00, 67.46it/s]


In [132]:
precip_table_anom.T.to_csv('/home/rgreen/tana-crunch/rgreen/crunch/DroughtEDM/Data/observations_tables/precip_table_anomT.csv')

In [133]:
ndvi_table_anom.T.to_csv('/home/rgreen/tana-crunch/rgreen/crunch/DroughtEDM/Data/observations_tables/ndvi_table_anomT.csv')

In [134]:
temp_table_anom.T.to_csv('/home/rgreen/tana-crunch/rgreen/crunch/DroughtEDM/Data/observations_tables/temp_table_anomT.csv')

In [6]:
precip_table_orig = pixelwise_ts_table(precip_orig_dir, 'precip')
temp_table_orig = pixelwise_ts_table(temp_orig_dir, 'temp')
ndvi_table_orig = pixelwise_ts_table(ndvi_orig_dir, 'ndvi')

100%|██████████| 606/606 [00:15<00:00, 40.18it/s]
100%|██████████| 606/606 [00:15<00:00, 38.93it/s]
100%|██████████| 606/606 [00:16<00:00, 37.87it/s]


In [7]:
precip_table_orig.T.to_csv('/home/rgreen/tana-crunch/rgreen/crunch/DroughtEDM/Data/observations_tables/precip_table_origT.csv')
ndvi_table_orig.T.to_csv('/home/rgreen/tana-crunch/rgreen/crunch/DroughtEDM/Data/observations_tables/ndvi_table_origT.csv')
temp_table_orig.T.to_csv('/home/rgreen/tana-crunch/rgreen/crunch/DroughtEDM/Data/observations_tables/temp_table_origT.csv')

In [105]:
precip_table_anom.label = 'precip'
precip_table_anom.rs_rows = 674
precip_table_anom.rs_cols = 583
precip_table_anom.n_samples = 606
ndvi_table_anom.label = 'ndvi'
ndvi_table_anom.rs_rows = 674
ndvi_table_anom.rs_cols = 583
ndvi_table_anom.n_samples = 606
temp_table_anom.label = 'temp'
temp_table_anom.rs_rows = 674
temp_table_anom.rs_cols = 583
temp_table_anom.n_samples = 606

table_list_anom = [precip_table_anom, ndvi_table_anom, temp_table_anom]

In [11]:
observations_tables = '/home/rgreen/DroughtEDM/Data/observations_tables/'

In [12]:
ndvi_table_anom_lc = pd.read_csv(observations_tables + 'ndvi_table_anom_lct_80000_84000.csv')
precip_table_anom_lc = pd.read_csv(observations_tables + 'precip_table_anom_lct_80000_84000.csv')
temp_table_anom_lc = pd.read_csv(observations_tables + 'temp_table_anom_lct_80000_84000.csv')

In [58]:
ndvi_table_anom_lct = ndvi_table_anom_lc.T.iloc[2:,:]
precip_table_anom_lct = precip_table_anom_lc.T.iloc[2:,:]
temp_table_anom_lct = temp_table_anom_lc.T.iloc[2:,:]

In [59]:
ndvi_table_anom_lct.columns = ndvi_table_anom_lct.iloc[0]
precip_table_anom_lct.columns = precip_table_anom_lct.iloc[0]
temp_table_anom_lct.columns = temp_table_anom_lct.iloc[0]

In [60]:
ndvi_table_anom_lct = ndvi_table_anom_lct.reset_index(drop=True)
precip_table_anom_lct = precip_table_anom_lct.reset_index(drop=True)
temp_table_anom_lct = temp_table_anom_lct.reset_index(drop=True)

In [15]:
temp_table_anom_lct

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3990,3991,3992,3993,3994,3995,3996,3997,3998,3999
0,-2.81368,-1.22337,-1.54951,-1.88898,-1.44481,-0.211662,-1.05201,-1.12097,-1.78194,-1.09324,...,-0.127298,-0.0156791,-0.704583,-0.794811,-0.24542,-0.17133,0.319564,-0.50478,-0.128677,-0.368369
1,-0.408533,-0.508778,-0.511472,-0.350533,-0.902775,0.188521,-0.552875,-0.743305,-1.09282,-0.877802,...,-1.4675,-1.45322,-1.79148,-1.75557,-1.66054,-1.6413,-1.3136,-1.44054,-1.43007,-1.3436
2,-0.321216,-0.276452,-0.119439,0.00214906,-0.152341,0.364686,-0.217244,-0.198759,-0.31549,-0.122007,...,-1.63901,-1.80784,-2.10103,-2.00157,-1.81117,-1.8838,-1.59932,-1.58412,-1.54603,-1.49075
3,-0.799134,-0.624247,-0.621431,-0.623051,-0.868344,0.545957,-0.701324,-0.280825,-0.641679,-0.775133,...,-1.79332,-1.77095,-1.93469,-1.95677,-1.98049,-1.90578,-1.68822,-1.80468,-1.61091,-1.08542
4,-0.799134,-0.624247,-0.621431,-0.623051,-0.868344,0.545957,-0.701324,-0.280825,-0.641679,-0.775133,...,-1.79332,-1.77095,-1.93469,-1.95677,-1.98049,-1.90578,-1.68822,-1.80468,-1.61091,-1.08542
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,0.663769,0.833922,1.15133,1.23666,1.33144,1.1542,0.965158,0.98796,0.932944,0.916844,...,1.31836,1.3338,1.37683,1.31869,1.28627,1.20894,1.13015,1.39572,1.48799,1.01583
602,0.912824,1.28464,2.01701,2.08331,1.91916,1.53369,1.12621,1.28215,0.93639,0.52778,...,1.42433,1.46572,1.62918,1.52015,1.34849,1.3103,1.19948,1.37359,1.38359,0.962126
603,1.13144,0.888477,0.264573,0.334488,0.868679,1.34192,0.927947,1.10818,1.11473,0.813045,...,1.5461,1.6736,1.67175,1.66751,1.50342,1.41676,1.42823,1.56439,1.53457,1.04348
604,1.21234,1.13331,1.09204,1.09708,1.23481,1.82914,1.09794,1.29348,1.08087,1.07033,...,1.19174,1.21644,1.25421,0.968125,1.50082,1.22104,1.19779,1.19043,1.37183,0.789118


In [61]:
temp_table_anom_lct = temp_table_anom_lct.astype(float)
ndvi_table_anom_lct = ndvi_table_anom_lct.astype(float)
precip_table_anom_lct = precip_table_anom_lct.astype(float)

In [34]:
precip_table_anom_lct

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3990,3991,3992,3993,3994,3995,3996,3997,3998,3999
0,-0.487665,-0.456052,-0.493472,-0.494614,-0.491822,-0.620685,-0.497999,-0.500617,-0.478876,-0.461698,...,0.245794,0.173310,0.335890,0.338354,0.362732,0.301465,0.419315,0.502287,0.935008,0.650995
1,-0.487665,-0.456052,-0.493472,-0.494614,-0.491822,-0.659595,-0.492448,-0.496633,-0.478876,-0.461698,...,2.105916,1.977371,2.040100,2.101758,2.094982,2.076340,1.923800,1.847087,1.661063,1.192369
2,-0.487665,-0.456052,-0.493472,-0.494614,-0.491822,-0.600639,-0.494050,-0.497939,-0.478876,-0.461698,...,2.926425,2.812612,2.864779,2.848457,3.035506,3.003277,2.914677,2.873978,2.977706,2.742235
3,-0.487665,-0.456052,-0.493472,-0.494614,-0.491822,-0.604319,-0.514979,-0.510658,-0.478876,-0.461698,...,0.812463,0.983313,0.926416,0.909243,0.839340,0.779261,0.868789,0.930792,1.087332,1.641198
4,-0.487665,-0.456052,-0.493472,-0.494614,-0.491822,-0.642755,-0.518604,-0.512819,-0.478876,-0.461698,...,1.735287,1.743790,1.974602,1.872466,1.818978,1.741924,1.977928,1.925377,1.966063,2.147840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
601,-0.369789,-0.332648,-0.428543,-0.432737,-0.434248,-0.690428,-0.443841,-0.438202,-0.369707,-0.352867,...,-0.721672,-0.713191,-0.713361,-0.717122,-0.710210,-0.715502,-0.706995,-0.698011,-0.682170,-0.619284
602,0.177799,0.172231,0.023775,-0.018818,0.008789,-0.315428,0.126901,0.106665,0.102685,0.094623,...,-0.704748,-0.697786,-0.699550,-0.704166,-0.697513,-0.703976,-0.690114,-0.681565,-0.661596,-0.605596
603,-0.383571,-0.333739,-0.128067,-0.128486,-0.164874,-0.453824,-0.452269,-0.401140,-0.385493,-0.334734,...,-0.671615,-0.680397,-0.675740,-0.680602,-0.669337,-0.672772,-0.674163,-0.659366,-0.622132,-0.590052
604,-0.406395,-0.394417,-0.212884,-0.213081,-0.208303,-0.480108,-0.458699,-0.413761,-0.407296,-0.395727,...,-0.634341,-0.645323,-0.634513,-0.640781,-0.631111,-0.635554,-0.641941,-0.621564,-0.552329,-0.508031


In [39]:
ndvi_table_anom_lct = dd.from_pandas(ndvi_table_anom_lct, npartitions = 4)
precip_table_anom_lct = dd.from_pandas(precip_table_anom_lct, npartitions = 4)
temp_table_anom_lct = dd.from_pandas(temp_table_anom_lct, npartitions = 4)

In [106]:
precip_table_anom_lct.label = 'precip'
precip_table_anom_lct.rs_rows = 674
precip_table_anom_lct.rs_cols = 583
precip_table_anom_lct.n_samples = 606
ndvi_table_anom_lct.label = 'ndvi'
ndvi_table_anom_lct.rs_rows = 674
ndvi_table_anom_lct.rs_cols = 583
ndvi_table_anom_lct.n_samples = 606
temp_table_anom_lct.label = 'temp'
temp_table_anom_lct.rs_rows = 674
temp_table_anom_lct.rs_cols = 583
temp_table_anom_lct.n_samples = 606

table_list_anom_lct = [precip_table_anom_lct, ndvi_table_anom_lct, temp_table_anom_lct]

In [None]:
#table_list = [precip_table, ndvi_table, temp_table]
labels = 'precip ndvi temp'
target_label = 'ndvi'
train = '1 606'
test = '1 606'

ed = 2
pi = 18
table_list = table_list_anom_lct

table_dict = {'Time': dekads }
[table_dict.update({table.label: table.iloc[:,0]}) for i, table in enumerate(table_list)]

#interim_df = pd.DataFrame(table_dict, index = dekads)
interim_df = pd.DataFrame(table_dict)
#print(type(interim_df.loc[0])

interim_df.set_index('Time',inplace=True, drop=False)

dekads_fut = generate_dekads(dekads[-1],pd.datetime(2020,4,30))

# Check whether column has a few nans
if interim_df.temp.isnull().values.any():
    # If so, iterate through nan rows and replace with monthly mean
    for t in interim_df[interim_df.temp.isnull()].index:
        interim_df.temp.loc[t] = interim_df.temp[interim_df.index.month == t.month].mean()
if interim_df.ndvi.isnull().values.any():
    # If so, iterate through nan rows and replace with monthly mean
    for t in interim_df[interim_df.ndvi.isnull()].index:
        interim_df.ndvi.loc[t] = interim_df.ndvi[interim_df.index.month == t.month].mean()
if interim_df.precip.isnull().values.any():
    # If so, iterate through nan rows and replace with monthly mean
    for t in interim_df[interim_df.precip.isnull()].index:
        interim_df.precip.loc[t] = interim_df.precip[interim_df.index.month == t.month].mean()
        
#interim_df.isnull().values.any()

simplex_result = Simplex(dataFrame = interim_df,
                         lib = train,
                         pred = test,
                         E = ed, Tp = pi,
                         columns = labels, target = target_label, showPlot = False)

simplex_result.Time = pd.to_datetime(simplex_result.Time)
simplex_result.set_index(simplex_result.Time, inplace=True)

# Fix datetime index for future dekads
simplex_result[-pi:].Time = dekads_fut[1:pi+1]


# Select desired months
#simp_season = simplex_result[simplex_result.index.month == month_list]
#print(simp_season)
#simp_year = simplex_result[simplex_result.index.year == 2003]
#print(simp_year)
error = ComputeError(list(simplex_result.Observations), list(simplex_result.Predictions))
rho = list(error.values())[2]
print(rho)
simplex_out.append(rho)

In [126]:

def multi_pixelwise_simplex(tables, target_label, train, test, ed, pi): #year month_list, 
    
    '''
    This function computes the simplex prediction skill for a given embedding dimensions, prediction interval, land cover and environmental variable
    across pixels given multiple tables of different variables of pixel values across time for a region from the function pixelwise_ts_table 

    The result is an np.array of simplex error values reshaped back into the shape of the original raster image
    
    Args:
    - table: list of input tables, where every table has columns with the time series of each pixel in a raster
    - target_label: text label for the table to be used as the target (e.g. 'ndvi')
    - train: portion of data library used to train (string of range with space between values)
    - test: portion of data libary used to test (string of range with space between values) 
    - ed: embedding dimension
    - pi: prediction interval (Tp) 
    - rs_rows: number of rows to reshape list into original raster size
    - rs_cols: number of columns to reshape list into original raster size
   
    '''

    table_dict = {'Time': dekads }
    
#     if simplex_out is None:
#         simplex_out = pd.DataFrame()
    simplex_out= []
    labels = ' '.join([table.label for table in tables]) # Assumes that all tables have labels

    # Generate future dekads
    dekads_fut = generate_dekads(dekads[-1],pd.datetime(2020,4,30))
    
   
    
    # Iterate through columns (pixels)
    for col_idx in tqdm(range(len(tables[0].columns[:10]))):
        
        now=datetime.now()
        # PIXEL DATAFRAME
        # Generate dataframe for pixel where columns correspond to variables and rows to timestamp (image)
        [table_dict.update({table.label: table.iloc[:,col_idx]}) for i, table in enumerate(tables)]
        #interim_df = pd.DataFrame(table_dict, index = dekads)
        interim_df = pd.DataFrame(table_dict)

        interim_df.set_index('Time',inplace=True, drop=False)
        
                # If column has lots of nans (water), append nan
        if max([len(interim_df[interim_df[col_idx].isnull()]) for col_idx in interim_df.columns]) > 20:
            nans = [np.nan]*(len(interim_df)-ed+pi+1)
            #simplex_out[col_idx] = [np.nan]*(len(interim_df)-ed+pi+1)
            simplex_out.append(nans)
        else:

            # Check whether column has a few nans
            if interim_df.temp.isnull().values.any():
                # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.temp.isnull()].index:
                    interim_df.temp.loc[t] = interim_df.temp[interim_df.index.month == t.month].mean()
                    #print(interim_df)
            if interim_df.ndvi.isnull().values.any():
            # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.ndvi.isnull()].index:
                    interim_df.ndvi.loc[t] = interim_df.ndvi[interim_df.index.month == t.month].mean()
            if interim_df.precip.isnull().values.any():
            # If so, iterate through nan rows and replace with monthly mean
                for t in interim_df[interim_df.precip.isnull()].index:
                    interim_df.precip.loc[t] = interim_df.precip[interim_df.index.month == t.month].mean()
            #print(interim_df)
            
            # Run simplex
            
            simplex_result = Simplex(dataFrame = interim_df,
                                     lib = train,
                                     pred = test,
                                     E = ed, Tp = pi,
                                     columns = labels, target = target_label, showPlot = False)

            
            # Add datetime index + fix dekad values for future values
            simplex_result.Time = pd.to_datetime(simplex_result.Time)
            simplex_result.set_index(simplex_result.Time, inplace=True)
            
            
            # Fix datetime index for future dekads
            simplex_result[-pi:].Time = dekads_fut[1:pi+1]
            print(simplex_result)
            
            #simplex_result.dropna()
            #simplex_out[col_idx] = simplex_result.Predictions
            #print(simplex_result.Predictions.tolist())
            
            predictions = simplex_result.Predictions.tolist()
            simplex_out.append(predictions)
            
            #print(simplex_out[i])
            delta=datetime.now()-now
            print(delta)
    return pd.DataFrame(simplex_out).T
    


In [121]:
func = delayed(multi_pixelwise_simplex)

In [122]:
x = func(table_list_anom_lct, 'ndvi', train = '1 606', test = '1 606',ed = 6, pi = 1)

In [None]:
x.compute()

In [137]:
ndvi_ea_simplex_ed6_pi2 = multi_pixelwise_simplex(table_list_anom, 'ndvi', train = '1 606', test = '1 606',ed = 6, pi = 1)

 30%|███       | 3/10 [00:00<00:00, 10.48it/s]

                 Time  Observations  Predictions  Pred_Variance
Time                                                           
2002-08-21 2002-08-21      1.218332          NaN            NaN
2002-09-01 2002-09-01      1.314077     0.946811       0.047296
2002-09-11 2002-09-11      0.860038     0.945524       0.083529
2002-09-21 2002-09-21      0.436575     0.832504       0.660186
2002-10-01 2002-10-01     -0.923461    -0.036171       0.447613
...               ...           ...          ...            ...
2019-03-21 2019-03-21      0.522922     0.494470       0.136372
2019-04-01 2019-04-01      0.331863     0.468478       0.091264
2019-04-11 2019-04-11      0.306670     0.567551       0.126001
2019-04-21 2019-04-21      0.452332     0.377132       0.094231
2019-05-01 2019-05-01           NaN     0.841261       0.281016

[602 rows x 4 columns]
0:00:00.108553
                 Time  Observations  Predictions  Pred_Variance
Time                                                           
2

 70%|███████   | 7/10 [00:00<00:00, 12.98it/s]

                 Time  Observations  Predictions  Pred_Variance
Time                                                           
2002-08-21 2002-08-21      1.298449          NaN            NaN
2002-09-01 2002-09-01      1.357547     0.240625       0.528031
2002-09-11 2002-09-11      0.583769     0.468045       0.711919
2002-09-21 2002-09-21      0.628330     0.028462       0.451993
2002-10-01 2002-10-01     -0.148798     0.507106       0.319292
...               ...           ...          ...            ...
2019-03-21 2019-03-21     -0.054164     0.197984       0.383514
2019-04-01 2019-04-01     -0.445762     0.408363       0.304896
2019-04-11 2019-04-11     -0.584455     0.183816       0.598027
2019-04-21 2019-04-21     -0.573427     0.214312       0.683981
2019-05-01 2019-05-01           NaN     0.051942       0.407115

[602 rows x 4 columns]
0:00:00.058382
                 Time  Observations  Predictions  Pred_Variance
Time                                                           
2

100%|██████████| 10/10 [00:00<00:00, 15.20it/s]


                 Time  Observations  Predictions  Pred_Variance
Time                                                           
2002-08-21 2002-08-21      1.071128          NaN            NaN
2002-09-01 2002-09-01      0.999675     0.815104       0.163086
2002-09-11 2002-09-11      0.386379     0.990763       0.093072
2002-09-21 2002-09-21      0.268390     0.598996       0.331354
2002-10-01 2002-10-01     -0.660950    -0.032759       0.368909
...               ...           ...          ...            ...
2019-03-21 2019-03-21     -0.132028    -0.380669       0.375267
2019-04-01 2019-04-01      0.460797     0.022260       0.156191
2019-04-11 2019-04-11     -0.056771     0.439933       0.202869
2019-04-21 2019-04-21      0.524811     0.622146       0.491489
2019-05-01 2019-05-01           NaN     0.636490       0.599362

[602 rows x 4 columns]
0:00:00.059172
                 Time  Observations  Predictions  Pred_Variance
Time                                                           
2



In [119]:
ndvi_ea_simplex_ed6_pi1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,,,,,,,,,,
1,0.946811,0.914877,0.797512,0.594871,0.240625,0.570968,0.724815,0.794612,0.815104,0.863327
2,0.945524,1.025262,0.727074,0.824794,0.468045,0.813933,0.973104,1.038043,0.990763,1.012515
3,0.832504,0.311903,0.290384,0.479303,0.028462,0.122248,0.266806,0.354354,0.598996,0.446858
4,-0.036171,0.242015,0.247698,0.597825,0.507106,0.578202,0.152151,0.127195,-0.032759,0.087431
...,...,...,...,...,...,...,...,...,...,...
597,0.494470,0.338419,0.387397,0.334896,0.197984,0.199564,-0.053598,0.005826,-0.380669,0.174347
598,0.468478,0.271786,0.512485,0.558958,0.408363,0.368130,0.235156,0.548281,0.022260,0.656614
599,0.567551,0.270379,0.741047,0.308590,0.183816,0.581590,0.412117,0.557788,0.439933,0.801484
600,0.377132,0.259359,0.340158,0.382132,0.214312,0.463818,0.228379,0.593213,0.622146,0.925211


In [None]:
ndvi_ea_simplex_ed6_pi2 = multi_pixelwise_simplex(table_list_anom, 'ndvi', train = '1 606', test = '1 606',ed = 6, pi = 2)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
 18%|█▊        | 69194/392942 [52:41<2:12:40, 40.67it/s]

In [42]:
ndvi_ea_simplex_ed6_pi1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,,,,,,,,,,
1,0.946811,0.914877,0.797512,0.594871,0.240625,0.570968,0.724815,0.794612,0.815104,0.863327
2,0.945524,1.025262,0.727074,0.824794,0.468045,0.813933,0.973104,1.038043,0.990763,1.012515
3,0.832504,0.311903,0.290384,0.479303,0.028462,0.122248,0.266806,0.354354,0.598996,0.446858
4,-0.036171,0.242015,0.247698,0.597825,0.507106,0.578202,0.152151,0.127195,-0.032759,0.087431
...,...,...,...,...,...,...,...,...,...,...
597,0.494470,0.338419,0.387397,0.334896,0.197984,0.199564,-0.053598,0.005826,-0.380669,0.174347
598,0.468478,0.271786,0.512485,0.558958,0.408363,0.368130,0.235156,0.548281,0.022260,0.656614
599,0.567551,0.270379,0.741047,0.308590,0.183816,0.581590,0.412117,0.557788,0.439933,0.801484
600,0.377132,0.259359,0.340158,0.382132,0.214312,0.463818,0.228379,0.593213,0.622146,0.925211


In [None]:
simplex_df = pd.DataFrame()
num_col_in_chunk = 1000
for starting_col in range(0, 392942, num_col_in_chunk):#392942
    ndvi_ea_simplex= multi_pixelwise_simplex(table_list_anom, 'ndvi', train = '1 606', test = '1 606',ed = 6, pi = 1, starting_column = starting_col, num_columns = num_col_in_chunk, simplex_out=simplex_df)
    new_ndvi_ea_simplex = ndvi_ea_simplex.iloc[:,-num_col_in_chunk:]
    new_ndvi_ea_simplex.to_csv(data_volumes+'ndvi_ea_simplex_ed6_pi1_0' + str(starting_col) + '.csv')

In [12]:
df = DataFrame(np.random.randn(300000,600))

In [13]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,590,591,592,593,594,595,596,597,598,599
0,0.089729,0.453758,-0.227077,1.086760,0.429158,-0.267277,-0.586962,-0.291149,0.610582,-0.677323,...,0.954878,0.041511,-1.375701,-0.988672,1.035983,-1.124558,-0.239706,1.111770,0.625992,-0.826514
1,-2.356091,1.201864,0.747935,-0.620006,-0.803996,0.650603,0.854943,0.640250,-0.750479,0.867959,...,0.426309,2.774478,-0.953031,-0.399365,-0.363743,1.257300,1.981312,-0.435598,0.069492,0.832684
2,2.593216,0.897086,0.542531,0.407485,1.143427,0.145874,-1.135798,0.319879,-1.022585,-1.866101,...,1.302528,1.080571,-1.013649,-0.054470,-0.528769,0.980161,-0.199724,-0.105143,-1.204524,-0.476064
3,0.853982,-0.469263,-1.182209,0.437694,-1.529014,-1.468944,0.424029,-1.374118,-0.226055,2.710123,...,1.580888,-0.438513,-0.203751,-0.157685,1.415128,0.349609,0.349413,-2.277077,-0.177368,-0.863924
4,-0.028378,-0.124003,0.459031,0.986843,-1.041047,-0.589072,-1.330099,-0.115318,-0.552186,0.303430,...,0.694026,-2.879508,-0.531474,0.605278,0.402023,2.876967,-0.567868,0.123081,0.086449,-0.046321
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299995,-1.692810,0.888814,-0.694874,1.251194,-1.756056,0.344140,0.427033,1.722951,-0.954262,1.133018,...,-0.838315,0.991573,0.358765,-0.249567,0.945794,1.935669,0.569591,0.133618,-0.790139,-1.621846
299996,1.396363,-0.244527,-1.287964,0.722180,0.403257,0.161471,0.387789,-0.516893,0.069343,0.483551,...,-0.174783,-2.105806,-0.358293,1.096476,-0.070699,1.501764,0.481007,1.599794,0.442363,-0.594189
299997,2.112641,1.274941,-1.335623,-0.524763,-0.606644,-0.333212,1.466827,1.328934,-0.218637,-0.170698,...,0.896251,-0.292122,0.942140,-1.558474,-0.535838,-0.536876,1.004867,0.184847,-0.154542,-1.700770
299998,-1.199137,0.551833,0.757334,-1.308611,-1.778348,0.226848,2.733152,1.054062,0.190829,-1.163540,...,1.363647,-0.470022,-0.325498,0.113593,1.652439,-0.808996,-0.031348,0.231624,1.744659,0.006439


In [15]:
def f2():
    result = []
    for i in range(10):
        result.append(df)
    return pd.concat(result)

In [18]:
from dask.distributed import Client

client = Client(n_workers=4)

In [22]:
from time import sleep

def inc(x):
    sleep(1)
    return x + 1

def add(x, y):
    sleep(1)
    return x + y

In [23]:
data = [1, 2, 3, 4, 5, 6, 7, 8]


In [24]:
%%time 
# Sequential code

results = []
for x in data:
    y = inc(x)
    results.append(y)
    
total = sum(results)

CPU times: user 200 ms, sys: 92 ms, total: 292 ms
Wall time: 8.01 s


In [25]:
total

44

In [26]:
from dask import delayed

In [27]:
# This runs immediately, all it does is build a graph

x = delayed(inc)(1)
y = delayed(inc)(2)
z = delayed(add)(x, y)

In [28]:
%%time
# This actually runs our computation using a local thread pool

z.compute()

CPU times: user 48 ms, sys: 40 ms, total: 88 ms
Wall time: 2.02 s


5

In [32]:
z.compute()

5