# Iteratively running random forest on WA HUC units using subsets of training data and raster stacks from full WA state

In [1]:
library(terra)
library(tidyterra)
library(dplyr)
library(randomForest)
library(caret)
set.seed(11)

terra 1.7.71


Attaching package: ‘tidyterra’


The following object is masked from ‘package:stats’:

    filter



Attaching package: ‘dplyr’


The following objects are masked from ‘package:terra’:

    intersect, union


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


randomForest 4.7-1.1

Type rfNews() to see new features/changes/bug fixes.


Attaching package: ‘randomForest’


The following object is masked from ‘package:dplyr’:

    combine




In [5]:
wa_shp <- vect("data/wa_shp_diss.gpkg")
wa_hucs <- vect("data/Hydrography/WA_HUC8.gpkg")

In [14]:
datapath <- paste0(getwd(), "/data/huc_data")
datapath

In [15]:
gsub(" ", "", paste0(wa_hucs$name[[1]]))


In [18]:
list_rasts <- as.list(list.files(path = datapath, 
                                 full.names = TRUE, 
                                 pattern = "Skykomish",
                                 include.dirs = FALSE))
list_rasts

list_pts <- as.list(list.files(path = paste0(datapath, "/points"), 
                                 full.names = TRUE, 
                                 pattern = "Skykomish",
                                 include.dirs = FALSE))
list_pts

In [26]:
terr_rast <- (list_rasts[grep('terr', (list_rasts))])[[1]]
(terr_rast)

deparse(substitute(terr_rast))

In [84]:
wip_rf_func <- function(hucs, path) {
    for(i in 1:length(hucs)){
        rasts <- as.list(list.files(path = path, 
                                     full.names = TRUE, 
                                     pattern = gsub(" ", "", paste0(hucs$name[[i]])),
                                     include.dirs = FALSE))
    
        pts <- as.list(list.files(path = paste0(path, "/points"), 
                                     full.names = TRUE, 
                                     pattern = gsub(" ", "", paste0(hucs$name[[i]])),
                                     include.dirs = FALSE))
      
        training_pts <- vect(pts[[1]])
        terr_rast <- rast((rasts[grep('terr', (rasts))])[[1]])
        spec_rast <- rast((rasts[grep('spec', (rasts))])[[1]])
        clim_rast <- rast((rasts[grep('clim', (rasts))])[[1]])
        
        pts_ext <-  training_pts |>
                terra::extract(x = terr_rast, bind = T) |> 
                terra::extract(x = spec_rast, bind = T) |> 
                terra::extract(x = clim_rast, bind = T) |> 
                as.data.frame() |> 
                dplyr::mutate(class = as.factor(class)) |>
                na.exclude() |>
                write.csv(paste0(path, "/RF_Models/", 
                                 gsub(" ", "", paste0(hucs$name[[i]])), "rf_df", ".csv"))
        
        pts_ext <- read.csv(paste0(path, "/RF_Models/", 
                                 gsub(" ", "", paste0(hucs$name[[i]])), "rf_df", ".csv"),
                            stringsAsFactors = T) |>
                    select(-X)

        train.index <- as.vector(sample(c(1:nrow(pts_ext)), 0.7*nrow(pts_ext), replace=F))
        train <- pts_ext[train.index, ]
        test <- pts_ext[-train.index, ]

        rf_model <- randomForest(as.factor(class) ~ ., mtry = 10, 
                         sampsize = nrow(train[train$class == "WET",]),
                         replace = TRUE, #weights = wetwt, 
                         nodesize =1,
                         ntree = 1000, na.action = na.omit,
                         importance = TRUE, data = train)
        
        test_predict <- predict(rf_model, newdata = test, type = "response") 
        cm <- caret::confusionMatrix(test_predict, as.factor(test$class))
        
        cmtocsv <- data.frame(cbind(t(cm$overall),t(cm$byClass)))
        write.csv(cmtocsv,file=paste0(path, "/RF_Models/",
                                          gsub(" ", "", paste0(hucs$name[[i]])), "rf_model_testCM", ".csv"))
        
        save(rf_model, file = paste0(path, "/RF_Models/",
                                          gsub(" ", "", paste0(hucs$name[[i]])), "rf_model", ".RData"))
        
        
    }
}


In [79]:
(read.csv("data/huc_data/RF_Models/WillapaBayrf_df.csv")) |> select(-X)

class,NASADEM_HGT_n49w125,wa_dev27,wa_dev3,wa_dev81,wa_dinfsca,wa_planc,wa_profc,wa_rugged,wa_slope,wa_twi,tree_canopy_cover,NDVI_median,MNDWI_median,EVI_median,SAVI_median,NDYI_median,PRISM_ppt_30yr_normal_800mM4_annual_asc,PRISM_tmean_30yr_normal_800mM4_annual_asc,PRISM_vpdmax_30yr_normal_800mM5_annual_asc
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UPL,133.61749,-0.448040277,0.19003369,-0.10893796,23.64304,-38.921513,-5.4683523,1.4579096,3.9171553,5.844370,58,0.9066304,-0.4665410,0.5598455,0.5354946,0.4637725,2244.611,9.544688,8.143576
UPL,155.20795,0.283215314,-0.03954102,0.08954266,42.30442,34.893261,0.1698989,10.3180866,27.1091290,4.414465,58,0.9055085,-0.5126817,0.6726913,0.6137024,0.4106536,2039.214,10.140944,8.966765
UPL,68.80382,0.236647010,-0.01541678,0.76339841,23.64304,27.640148,3.2159915,1.3018215,3.2960603,6.017464,61,0.8884072,-0.4432457,0.5422187,0.5195574,0.4252861,2121.223,10.415134,6.795833
UPL,32.91496,-0.120091982,-0.04649486,-0.09224981,72.85970,66.251053,-7.1380959,4.3396287,11.9590702,5.840601,71,0.8774161,-0.4258616,0.5184364,0.4993690,0.4277071,2089.159,10.507970,6.645146
UPL,22.90202,0.321765453,-0.29542065,1.16392243,89.44859,54.192123,5.2654095,0.8310365,2.1920457,7.756568,67,0.8282877,-0.4125210,0.4203092,0.4080107,0.2758917,1929.628,10.383741,5.549780
UPL,256.57687,-0.309701890,-0.49268711,-0.23131222,309.67090,153.939667,11.5871830,2.6753097,6.3780303,7.926735,57,0.8722496,-0.5633449,0.5910003,0.5511211,0.3923204,2151.212,9.730947,8.968331
UPL,151.17870,0.327484548,0.38052917,0.74337298,47.28609,-179.600876,-6.5610490,3.2281992,8.0087433,5.817367,55,0.9094237,-0.4819886,0.6323696,0.5843077,0.4121353,2154.360,9.861394,7.962495
UPL,116.61768,-0.219294921,-0.09542012,-0.13739616,95.91204,-40.148834,15.1597271,3.4367521,9.1677504,6.387381,65,0.8802097,-0.4592847,0.5131344,0.4975280,0.4035520,2347.451,10.169700,8.382439
UPL,133.21484,0.496566951,0.39702713,0.97854477,338.70337,-191.009308,-8.6069202,2.5426164,6.2305126,8.039941,50,0.9144223,-0.4717989,0.6952219,0.6305297,0.4334228,2275.680,9.886416,7.745292
UPL,329.32935,0.076593921,0.05333271,-0.19032928,23.64304,7.657891,-4.9765115,6.1524467,16.7396412,4.364483,66,0.9053593,-0.4705703,0.5502989,0.5277288,0.4970152,3413.536,9.273639,8.310424


In [None]:
wip_rf_func(wa_hucs, datapath)

In [82]:
test <- get(load("data/huc_data/RF_Models/WillapaBayrf_model.RData"))

In [83]:
test$importance

Unnamed: 0,UPL,WET,MeanDecreaseAccuracy,MeanDecreaseGini
NASADEM_HGT_n49w125,0.094063428,0.3287113,0.156548526,439.00482
wa_dev27,0.014888491,0.01484818,0.0148791582,42.39771
wa_dev3,0.00534987,0.001145942,0.0042301004,19.4766
wa_dev81,0.025702194,0.05791476,0.0342833176,85.76355
wa_dinfsca,0.000189483,0.001061476,0.0004215264,10.03711
wa_planc,0.002674844,-0.0006500168,0.0017893549,19.35994
wa_profc,0.000767237,0.009241985,0.0030233495,23.50643
wa_rugged,0.015339858,0.02734251,0.0185418327,73.34279
wa_slope,0.011614965,0.02377309,0.0148523691,50.07008
wa_twi,0.002355124,0.01488189,0.0056911828,18.18619
