-
Notifications
You must be signed in to change notification settings - Fork 5
/
foss-oracle-r.qmd
250 lines (215 loc) · 7.55 KB
/
foss-oracle-r.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
---
title: Access via Oracle and R (AFSC only)
number-sections: true
---
```{r setup, include=FALSE}
#| file: functions.R
```
If the user has access to the AFSC `Oracle` database, the user can use `SQL developer` to view and pull the FOSS public data directly from the `GAP_PRODUCTS` `Oracle` schema.
### Connect to Oracle from R
Many users will want to access the data from `Oracle` using `R`. The user will need to install the `RODBC` `R` package and ask OFIS (IT) connect `R` to `Oracle`. Then, use the following code in `R` to establish a connection from `R` to `Oracle`:
Here, the user can write in their username and password directly into the `RODBC` connect function. Never save usernames or passwords in scripts that may be intentionally or unintentionally shared with others. If no username and password is entered in the function, pop-ups will appear on the screen asking for the username and password.
```{r oracle-connect-2, echo = TRUE, eval = FALSE}
library(gapindex)
channel <- gapindex::get_connected()
```
### Ex. Wholesale download data and join data in R
```{r}
#| label: test-1a
#| tbl-cap: "Wholesale download data and join data in R. "
#| message: false
#| warning: false
#| echo: true
#| eval: false
locations <- c(
"GAP_PRODUCTS.FOSS_CATCH",
"GAP_PRODUCTS.FOSS_HAUL",
"GAP_PRODUCTS.FOSS_SPECIES"
)
print(Sys.Date())
error_loading <- c() # log if any tables are unable to download
for (i in 1:length(locations)){
print(locations[i])
a <- RODBC::sqlQuery(channel, paste0("SELECT * FROM ", locations[i], "; "))
if (is.null(nrow(a))) { # if an error in downloading has occurred
error_loading <- c(error_loading, locations[i])
} else { # if no error in downloading has occurred
write.csv(x = a,
# change file name to be more computer file storage friendly
here::here(paste0(tolower(gsub(
pattern = '.',
replacement = "_",
x = locations[i],
fixed = TRUE)),
".csv")))
}
}
error_loading
```
Join downloaded files into presence-only table
```{r}
#| label: test-1b
#| tbl-cap: "Presence-only join data in R. "
#| message: false
#| warning: false
#| echo: true
#| eval: false
# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column
dat <-
# join haul and catch data to unique species by survey table
dplyr::left_join(haul, catch) %>%
# join species data to unique species by survey table
dplyr::left_join(species) %>%
# modify zero-filled rows
dplyr::mutate(
CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2), # just in case
CPUE_KGHA = CPUE_KGKM2/100, # Hectares
CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2), # just in case
CPUE_NOHA = CPUE_NOKM2/100, # Hectares
COUNT = ifelse(is.null(COUNT), 0, COUNT),
WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )
```
Join downloaded files into zero-filled table
```{r}
#| label: test-1c
#| tbl-cap: "Zero-fill join data in R. "
#| message: false
#| warning: false
#| echo: true
#| eval: false
# Load data
library(dplyr)
library(here)
library(readr)
catch <- readr::read_csv(file = here::here("data/gap_products_foss_catch.csv"))[,-1] # remove "row number" column
haul <- readr::read_csv(file = here::here("data/gap_products_foss_haul.csv"))[,-1] # remove "row number" column
species <- readr::read_csv(file = here::here("data/gap_products_foss_species.csv"))[,-1] # remove "row number" column
# come up with full combination of what species should be listed for what hauls/surveys
# for zero-filled data, all species caught in a survey need to have zero or non-zero row entries for a haul
comb <- dplyr::full_join(
x = dplyr::left_join(catch, haul, by = "HAULJOIN") %>%
dplyr::select(SURVEY_DEFINITION_ID, SPECIES_CODE) %>%
dplyr::distinct(),
y = haul %>%
dplyr::select(SURVEY_DEFINITION_ID, HAULJOIN) %>%
dplyr::distinct(),
by = "SURVEY_DEFINITION_ID",
relationship = "many-to-many"
)
# Join data to make a full zero-filled CPUE dataset
dat <- comb %>%
# add species data to unique species by survey table
dplyr::left_join(species, "SPECIES_CODE") %>%
# add catch data
dplyr::full_join(catch, c("SPECIES_CODE", "HAULJOIN")) %>%
# add haul data
dplyr::full_join(haul) %>% # , c("SURVEY_DEFINITION_ID", "HAULJOIN")
# modify zero-filled rows
dplyr::mutate(
CPUE_KGKM2 = ifelse(is.null(CPUE_KGKM2), 0, CPUE_KGKM2),
CPUE_KGHA = CPUE_KGKM2/100, # Hectares
CPUE_NOKM2 = ifelse(is.null(CPUE_NOKM2), 0, CPUE_NOKM2),
CPUE_NOHA = CPUE_NOKM2/100, # Hectares
COUNT = ifelse(is.null(COUNT), 0, COUNT),
WEIGHT_KG = ifelse(is.null(WEIGHT_KG), 0, WEIGHT_KG) )
```
### Ex. Join data using Oracle
To join these tables in Oracle, you may use a variant of the following code:
```{sql test-4, eval = FALSE, echo = TRUE}
SELECT
hh.YEAR,
hh.SRVY,
hh.SURVEY,
hh.SURVEY_DEFINITION_ID,
hh.SURVEY_NAME,
hh.CRUISE,
hh.CRUISEJOIN,
hh.HAUL,
hh.HAULJOIN,
hh.STRATUM,
hh.STATION,
hh.VESSEL_ID,
hh.VESSEL_NAME,
hh.DATE_TIME,
hh.LATITUDE_DD_START,
hh.LONGITUDE_DD_START,
hh.LATITUDE_DD_END,
hh.LONGITUDE_DD_END,
hh.BOTTOM_TEMPERATURE_C,
hh.SURFACE_TEMPERATURE_C,
hh.DEPTH_M,
cc.SPECIES_CODE,
ss.ITIS,
ss.WORMS,
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
CASE WHEN cc.CPUE_KGKM2 IS NULL THEN 0 ELSE cc.CPUE_KGKM2 END AS CPUE_KGKM2,
CASE WHEN cc.CPUE_NOKM2 IS NULL THEN 0 ELSE cc.CPUE_NOKM2 END AS CPUE_NOKM2,
CASE WHEN cc.COUNT IS NULL THEN 0 ELSE cc.COUNT END AS COUNT,
CASE WHEN cc.WEIGHT_KG IS NULL THEN 0 ELSE cc.WEIGHT_KG END AS WEIGHT_KG,
CASE WHEN cc.TAXON_CONFIDENCE IS NULL THEN NULL ELSE cc.TAXON_CONFIDENCE END AS TAXON_CONFIDENCE,
hh.AREA_SWEPT_KM2,
hh.DISTANCE_FISHED_KM,
hh.DURATION_HR,
hh.NET_WIDTH_M,
hh.NET_HEIGHT_M,
hh.PERFORMANCE
FROM GAP_PRODUCTS.FOSS_SURVEY_SPECIES sv
FULL OUTER JOIN GAP_PRODUCTS.FOSS_SPECIES ss
ON sv.SPECIES_CODE = ss.SPECIES_CODE
FULL OUTER JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON sv.SURVEY_DEFINITION_ID = hh.SURVEY_DEFINITION_ID
FULL OUTER JOIN GAP_PRODUCTS.FOSS_CATCH cc
ON sv.SPECIES_CODE = cc.SPECIES_CODE
AND hh.HAULJOIN = cc.HAULJOIN
```
### Ex. Subset data
Here, we are pulling EBS Pacific cod from 2010 - 2021:
```{r test-2, echo = TRUE, eval = TRUE}
# Pull data
data <- RODBC::sqlQuery(
channel = channel,
query =
"SELECT * FROM GAP_PRODUCTS.FOSS_CATCH cc
JOIN GAP_PRODUCTS.FOSS_HAUL hh
ON cc.HAULJOIN = hh.HAULJOIN
WHERE SRVY = 'EBS'
AND SPECIES_CODE = 21720 -- 'Pacific cod'
AND YEAR >= 2010
AND YEAR < 2021")
flextable::flextable(data[1:3,]) %>%
flextable::theme_zebra()
```
### Ex. Find all species found in the eastern Bering Sea (EBS) survey in 2023
```{r test-3, echo = TRUE, eval = TRUE}
# Pull data
data <- RODBC::sqlQuery(
channel = channel,
query =
"SELECT DISTINCT
ss.COMMON_NAME,
ss.SCIENTIFIC_NAME,
ss.ID_RANK,
ss.WORMS
FROM GAP_PRODUCTS.FOSS_CATCH cc -- get species codes
LEFT JOIN GAP_PRODUCTS.FOSS_SPECIES ss -- get species info
ON cc.SPECIES_CODE = ss.SPECIES_CODE
LEFT JOIN GAP_PRODUCTS.FOSS_HAUL hh -- filter by year and survey
ON cc.HAULJOIN = hh.HAULJOIN
WHERE hh.YEAR = 2023
AND hh.SURVEY_DEFINITION_ID = 98 -- EBS survey
ORDER BY COMMON_NAME")
flextable::flextable(data[1:3,]) %>%
# flextable::fit_to_width(max_width = 6) %>%
flextable::theme_zebra()
```
```{r rodbc-close}
RODBC::odbcCloseAll()
```