/
file_paths.R
180 lines (141 loc) · 9.64 KB
/
file_paths.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# This script contains all of the dataset locations and a brief decription of each
# This script can be sourced in order to not put absolute paths in any other script.
# Format:
# Dataset description
# Location retrieved or script used to create
# Name it is stored as <- location
# Anna Boser 2022
library(here)
# function to retrieve the path of the directory the file resides in
directory_path <- function(filepath){
return(sub('[/][^/]+$', '', filepath))
}
# OpenET 2016-2021
# From google earth engine; see https://developers.google.com/earth-engine/datasets/catalog/OpenET_ENSEMBLE_CONUS_GRIDMET_MONTHLY_v2_0 for data; see https://code.earthengine.google.com/cb078c9f8933fb4814619c7982b8e615 for download script
raw_openet_path <- here("data", "1_raw", "OpenET")
# study area
# This is a 10km buffer around agricultural lands in the Central Valley. It is available in the data folder.
study_area_loc <- here("data", "1_raw", "study_area", "study_area.shp")
# consistent grid -- study area
# This is a 70m grid generated in 1_generate_datasets/1_intermediate/OpenET.R. It is available in the data folder.
grid_loc <- here("data", "1_raw", "study_area", "grid.tif")
# tifs of monthly OpenET data
# generated in 1_generate_datasets/1_intermediate/OpenET.R
intermediate_openet_path <- here("data", "2_intermediate", "OpenET")
# raw landiq/dwr agriculture data
# from https://data.cnra.ca.gov/dataset/6c3d65e3-35bb-49e1-a51e-49d5a2cf09a9/resource/1da7b37a-dd97-4b69-a86a-fe824a252eaf/download/i15_crop_mapping_2019.zip. All years available at: https://data.cnra.ca.gov/dataset/statewide-crop-mapping
raw_dwr_path <- here("data", "1_raw", "DWR")
# flat ag shapefile where agriculture is in CA
# created in 1_generate_datasets/1_intermediate/agriculture.R
flat_ag_shp_loc <- here("data", "2_intermediate", "agriculture", "flag_ag", "flat_ag.shp")
# raster of where agriculture is in CA
# created in 1_generate_datasets/1_intermediate/agriculture.R
CA_ag_rast_loc <- here("data", "2_intermediate", "agriculture", "CA_ag.tif")
# raster of where agriculture is in the study area
# created in 1_generate_datasets/1_intermediate/agriculture.R
ag_rast_loc <- here("data", "2_intermediate", "agriculture", "ag.tif")
# a shapefile of California.
# Available in the data folder.
CA_shp_loc <- here("data", "1_raw", "study_area", "california", "california.shp")
# Cropland data layer (CDL)
# Can be downloaded at https://www.nass.usda.gov/Research_and_Science/Cropland/Release/index.php
CDL_path <- here("data", "1_raw", "CDL")
CDL2019_loc <- here("data", "1_raw", "CDL", "CDL2019", "2019_30m_cdls.img")
CDL2020_loc <- here("data", "1_raw", "CDL", "CDL2020", "2020_30m_cdls.img")
CDL2021_loc <- here("data", "1_raw", "CDL", "CDL2021", "2021_30m_cdls.img")
# CDL code dictionary
# Created using CDL metadata and available in the data folder.
cdl_code_dict_loc <- here("data", "1_raw", "CDL", "CDL_code_dictionary.csv")
# Raw PET data
# Download at https://data.bris.ac.uk/data/dataset/qb8ujazzda0s2aykkv0oq0ctp
raw_PET_path <- here("data", "1_raw", "PET")
# Intermediate PET data
# created in 1_generate_datasets/1_intermediate/PET.R
PET_means_path <- here("data", "2_intermediate", "PET", "1_means")
PET_resampled_path <- here("data", "2_intermediate", "PET", "2_resampled")
PET_study_area_path <- here("data", "2_intermediate", "PET", "3_study_area_means")
# soils -- California storie index raw data
# from gSSURGO and STATSGO2 resampled to the grid of gNATSGO.
# Obtained through personal correspondence with the CA State Soil Scientist
# Campbell, Steve - FPAC-NRCS, Portland, OR <steve.campbell@usda.gov>
gNATSGO_grid_loc <- here("data", "1_raw", "CA_storie", "CA_gNATSGO_MuRaster_tif", "MapunitRaster_10m.tif")
storie_index_loc <- here("data", "1_raw", "CA_storie", "CA_all_NASIS_Storie_Index_SSURGO_STATSGO2.csv") # available in the data folder
# intermediate soil data
# created in 1_generate_datasets/1_intermediate/5_soils.R 5.5_soils.py
gNATSGO_storie_loc <- here("data", "2_intermediate", "CA_storie", "gNATSGO_storie.tif")
CA_storie_resampled_loc <- here("data", "2_intermediate", "CA_storie", "CA_storie_index.tif")
storie_loc <- here("data", "2_intermediate", "CA_storie", "storie_index.tif") # available in data folder
# DEM: 90m resolution western US elevation data
# from https://www.sciencebase.gov/catalog/item/542aebf9e4b057766eed286a
dem_loc <- here("data", "1_raw", "DEM", "dem90_hf.tif")
# processed topography data: elevation, aspect, slope, and topographic
# created in 1_generate_datasets/1_intermediate/6_topography.R
ca_elevation_loc <- here("data", "2_intermediate", "topography", "ca_elevation.tif")
ca_slope_loc <- here("data", "2_intermediate", "topography", "ca_slope.tif")
ca_aspect_loc <- here("data", "2_intermediate", "topography", "ca_aspect.tif")
elevation_loc <- here("data", "2_intermediate", "topography", "elevation.tif")
slope_loc <- here("data", "2_intermediate", "topography", "slope.tif")
aspect_loc <- here("data", "2_intermediate", "topography", "aspect.tif")
# slope in radians and dem to feed into QGIS to get the TWI
# created in 1_generate_datasets/1_intermediate/7_topographic_wetness_index.R
dem_clip_loc <- here("data", "2_intermediate", "twi", "dem_hydrologic_regions.tif")
slope_clip_loc <- here("data", "2_intermediate", "twi", "slope_hydrologic_regions.tif")
# Topographic Wetness Index (TWI) calculated in QGIS
# following tutorial https://www.youtube.com/watch?v=aHCLCUwg3O0
# using files created in 1_generate_datasets/1_intermediate/7_topographic_wetness_index.R
upslope_area_loc <- here("data", "2_intermediate", "twi", "upslope_area_hydrologic_regions.tif")
# TWI resampled and clipped to study area
# created in 1_generate_datasets/1_intermediate/7_topographic_wetness_index.R
twi_resampled_loc <- here("data", "2_intermediate", "twi", "twi_hydrologic_regions_resmapled.tif")
twi_loc <- here("data", "2_intermediate", "twi", "twi.tif")
# county shapefile
# from https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip
counties_loc <- here("data", "1_raw", "counties", "cb_2018_us_county_500k.shp")
# a table with the location of a pixel and the county it belongs in
# created in 1_generate_datasets/1_intermediate/additional_data/1_counties.R
county_table_loc <- here("data", "4_for_analysis", "additional_data", "counties.csv")
# subbasin shapefile
# available at https://geodata.lib.utexas.edu/catalog/stanford-kt110sd4399 and in the data folder.
basin_loc <- here("data", "1_raw", "subbasins", "basin.shp")
# a table with the location of a pixel and the basin it belongs in
# created in 1_generate_datasets/1_intermediate/additional_data/5_basin.R
basin_table_loc <- here("data", "4_for_analysis", "additional_data", "basin.csv")
# A table with the location of a pixel and its crop type, sub crop type, and if multiple crops are grown on that pixel.
# Pure pixels only.
# created in 1_generate_datasets/1_intermediate/additional_data/2_crops.R
crops_dwr_table_path <- here("data", "4_for_analysis", "additional_data") # for the seperate years
crops_dwr_table_loc <- here("data", "4_for_analysis", "additional_data", "crops_all_dwr_fallow.csv")
# A table of fallow fields according to CDL
# created in 1_generate_datasets/1_intermediate/additional_data/2.5_crops.R
fallow_CDL_table_path <- here("data", "4_for_analysis", "additional_data") # for the seperate years
fallow_CDL_table_loc <- here("data", "4_for_analysis", "additional_data", "fallow_CDL.csv")
# datasets processed for training of the ML model (has CDL, CPAD, and FVEG natural pixels)
# created in 1_generate_datasets/3_data_aggregation/datasets_for_ml.R
natural_data_path <- here("data", "3_for_counterfactual", "training_data")
# datasets processed for getting ML model predictions (has DWR agricultural pixels)
# created in 1_generate_datasets/3_data_aggregation/datasets_for_ml.R
ag_data_loc <- here("data", "3_for_counterfactual", "agriculture", "agriculture.csv")
# same as ag_data_loc but only the fallow fields
# created in 1_generate_datasets/3_data_aggregation/3_add_crop_county_data.R
fallow_data_loc <- here("data", "3_for_counterfactual", "agriculture", "fallow.csv")
# same as fallow_data_loc but only the fallow fields that were also fallow in the cdl
# created in 1_generate_datasets/3_data_aggregation/3_add_crop_county_data.R
fallow_cdl_data_loc <- here("data", "3_for_counterfactual", "agriculture", "fallow_cdl.csv")
# path to the distributions of distances between 500 random ag pixels to natural land for the different natural test sets
# created in 3_analysis/1_additional_data_manipulation/2_ag_natural_distance.R
distance_distribution_path <- here("data", "4_for_analysis", "distances")
# USGS irrigation data
# 2015: https://waterdata.usgs.gov/ca/nwis/water_use?wu_year=2015&wu_area=County&wu_county=ALL&wu_category=IC&submitted_form=introduction&wu_county_nms=--ALL+Counties--&wu_category_nms=Irrigation%2C+Crop
# 2010: https://waterdata.usgs.gov/ca/nwis/water_use?wu_year=2010&wu_area=County&wu_county=ALL&wu_category=IC&submitted_form=introduction&wu_county_nms=--ALL+Counties--&wu_category_nms=Irrigation%2C+Crop
usgs_irr_path <- here("data", "1_raw", "USGS_waterdata")
# CalSIMETAW water use comparisons
# https://data.ca.gov/dataset/cal-simetaw-unit-values
calsimetaw_loc <- here("data", "1_raw", "CalSIMETAW", "calsimetaw.csv")
# fallow train, validation, and test locations
# splits created in 5_clean_and_split_fallow.R
training_data_path <- here("data", "3_for_counterfactual", "training_data", "train")
val_data_path <- here("data", "3_for_counterfactual", "agriculture")
test_data_path <- here("data", "3_for_counterfactual", "agriculture")
# the agriculture dataset but only 2016, 2018, and 2019
# created in 5_clean_and_split_fallow.R
ag_dwr_years_loc <- here("data", "3_for_counterfactual", "agriculture", "agriculture_dwr_years.csv")