In [1]:
import rioxarray
from pyprojroot import here
import pandas as pd
from osgeo import gdal
import numpy as np
import pickle

In [2]:
# flatten other rasters and add them to the dataset
def add_columns(file, name):
    ar = gdal.Open(str(file)).ReadAsArray()
    if len(ar.shape) == 2:
        ar = ar.reshape(ar.shape[0]*ar.shape[1]) #flatten the array
        dataframe[name] = ar
    elif len(ar.shape) == 3:
        ar = ar.reshape(ar.shape[0], ar.shape[1]*ar.shape[2]) #flatten the array same as above
        ar = ar.reshape(ar.shape[0]*ar.shape[1]) # flatten again
        dataframe[name] = ar
    else: 
        raise Exception("Unexpected number of dimensions")


In [4]:
dataframe = pd.read_csv(str(here("./data/for_analysis/full_grid_time_invariant_cv.csv")))

In [5]:
# add ET and PET

add_columns(str(here("./data/intermediate/PET/PET_grouped_0_cv.tif")), 
                     "PET0")

add_columns(str(here("./data/intermediate/PET/PET_grouped_1_cv.tif")), 
                     "PET1")

add_columns(str(here("./data/intermediate/PET/PET_grouped_2_cv.tif")), 
                     "PET2")

add_columns(str(here("./data/intermediate/PET/PET_grouped_3_cv.tif")), 
                     "PET3")

add_columns(str(here("./data/intermediate/PET/PET_grouped_4_cv.tif")), 
                     "PET4")

add_columns(str(here("./data/intermediate/PET/PET_grouped_5_cv.tif")), 
                     "PET5")


In [6]:
add_columns(str(here("./data/intermediate/ECOSTRESS_cv/ET_mean/0.tif")), 
                     "ET0")

add_columns(str(here("./data/intermediate/ECOSTRESS_cv/ET_mean/1.tif")), 
                     "ET1")

add_columns(str(here("./data/intermediate/ECOSTRESS_cv/ET_mean/2.tif")), 
                     "ET2")

add_columns(str(here("./data/intermediate/ECOSTRESS_cv/ET_mean/3.tif")), 
                     "ET3")

add_columns(str(here("./data/intermediate/ECOSTRESS_cv/ET_mean/4.tif")), 
                     "ET4")

add_columns(str(here("./data/intermediate/ECOSTRESS_cv/ET_mean/5.tif")), 
                     "ET5")


In [7]:
dataframe.head()

Unnamed: 0,y,x,agriculture,counterfactual,elevation,aspect,slope,soil,PET0,PET1,PET2,PET3,PET4,PET5,ET0,ET1,ET2,ET3,ET4,ET5
0,40.753141,-122.911442,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
1,40.753141,-122.910811,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
2,40.753141,-122.91018,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
3,40.753141,-122.909549,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38
4,40.753141,-122.908918,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38,-3.4000000000000003e+38


In [13]:
dataframe.shape

(62779780, 20)

In [14]:
# save the ET PET version
dataframe.to_csv(str(here("./data/for_analysis/full_grid_not_tidy_cv.csv")), index=False)

# filter only agriculture
agriculture = dataframe.query('agriculture==1')

# save
agriculture.to_csv(str(here("./data/for_analysis/agriculture_not_tidy_cv.csv")), index=False)

# filter only counterfactual
counterfactual = dataframe.query('counterfactual==1')

# save
counterfactual.to_csv(str(here("./data/for_analysis/counterfactual_not_tidy_cv.csv")), index=False)
