# Iteratively running random forest on WA HUC units using subsets of training data and raster stacks from full WA state

In [1]:
library(terra)
library(tidyterra)
library(dplyr)
library(randomForest)
library(caret)
set.seed(11)

terra 1.7.71


Attaching package: ‘tidyterra’


The following object is masked from ‘package:stats’:

    filter



Attaching package: ‘dplyr’


The following objects are masked from ‘package:terra’:

    intersect, union


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


randomForest 4.7-1.1

Type rfNews() to see new features/changes/bug fixes.


Attaching package: ‘randomForest’


The following object is masked from ‘package:dplyr’:

    combine




In [5]:
wa_shp <- vect("data/wa_shp_diss.gpkg")
wa_hucs <- vect("data/Hydrography/WA_HUC8.gpkg")

In [14]:
datapath <- paste0(getwd(), "/data/huc_data")
datapath

In [15]:
gsub(" ", "", paste0(wa_hucs$name[[1]]))


In [18]:
list_rasts <- as.list(list.files(path = datapath, 
                                 full.names = TRUE, 
                                 pattern = "Skykomish",
                                 include.dirs = FALSE))
list_rasts

list_pts <- as.list(list.files(path = paste0(datapath, "/points"), 
                                 full.names = TRUE, 
                                 pattern = "Skykomish",
                                 include.dirs = FALSE))
list_pts

In [26]:
terr_rast <- (list_rasts[grep('terr', (list_rasts))])[[1]]
(terr_rast)

deparse(substitute(terr_rast))

In [84]:
wip_rf_func <- function(hucs, path) {
    for(i in 1:length(hucs)){
        rasts <- as.list(list.files(path = path, 
                                     full.names = TRUE, 
                                     pattern = gsub(" ", "", paste0(hucs$name[[i]])),
                                     include.dirs = FALSE))
    
        pts <- as.list(list.files(path = paste0(path, "/points"), 
                                     full.names = TRUE, 
                                     pattern = gsub(" ", "", paste0(hucs$name[[i]])),
                                     include.dirs = FALSE))
      
        training_pts <- vect(pts[[1]])
        terr_rast <- rast((rasts[grep('terr', (rasts))])[[1]])
        spec_rast <- rast((rasts[grep('spec', (rasts))])[[1]])
        clim_rast <- rast((rasts[grep('clim', (rasts))])[[1]])
        
        pts_ext <-  training_pts |>
                terra::extract(x = terr_rast, bind = T) |> 
                terra::extract(x = spec_rast, bind = T) |> 
                terra::extract(x = clim_rast, bind = T) |> 
                as.data.frame() |> 
                dplyr::mutate(class = as.factor(class)) |>
                na.exclude() |>
                write.csv(paste0(path, "/RF_Models/", 
                                 gsub(" ", "", paste0(hucs$name[[i]])), "rf_df", ".csv"))
        
        pts_ext <- read.csv(paste0(path, "/RF_Models/", 
                                 gsub(" ", "", paste0(hucs$name[[i]])), "rf_df", ".csv"),
                            stringsAsFactors = T) |>
                    select(-X)

        train.index <- as.vector(sample(c(1:nrow(pts_ext)), 0.7*nrow(pts_ext), replace=F))
        train <- pts_ext[train.index, ]
        test <- pts_ext[-train.index, ]

        rf_model <- randomForest(as.factor(class) ~ ., mtry = 10, 
                         sampsize = nrow(train[train$class == "WET",]),
                         replace = TRUE, #weights = wetwt, 
                         nodesize =1,
                         ntree = 1000, na.action = na.omit,
                         importance = TRUE, data = train)
        
        test_predict <- predict(rf_model, newdata = test, type = "response") 
        cm <- caret::confusionMatrix(test_predict, as.factor(test$class))
        
        cmtocsv <- data.frame(cbind(t(cm$overall),t(cm$byClass)))
        write.csv(cmtocsv,file=paste0(path, "/RF_Models/",
                                          gsub(" ", "", paste0(hucs$name[[i]])), "rf_model_testCM", ".csv"))
        
        save(rf_model, file = paste0(path, "/RF_Models/",
                                          gsub(" ", "", paste0(hucs$name[[i]])), "rf_model", ".RData"))
        
        
    }
}


In [79]:
(read.csv("data/huc_data/RF_Models/WillapaBayrf_df.csv")) |> select(-X)

class,NASADEM_HGT_n49w125,wa_dev27,wa_dev3,wa_dev81,wa_dinfsca,wa_planc,wa_profc,wa_rugged,wa_slope,wa_twi,tree_canopy_cover,NDVI_median,MNDWI_median,EVI_median,SAVI_median,NDYI_median,PRISM_ppt_30yr_normal_800mM4_annual_asc,PRISM_tmean_30yr_normal_800mM4_annual_asc,PRISM_vpdmax_30yr_normal_800mM5_annual_asc
<fct>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
UPL,133.61749,-0.448040277,0.19003369,-0.10893796,23.64304,-38.921513,-5.4683523,1.4579096,3.9171553,5.844370,58,0.9066304,-0.4665410,0.5598455,0.5354946,0.4637725,2244.611,9.544688,8.143576
UPL,155.20795,0.283215314,-0.03954102,0.08954266,42.30442,34.893261,0.1698989,10.3180866,27.1091290,4.414465,58,0.9055085,-0.5126817,0.6726913,0.6137024,0.4106536,2039.214,10.140944,8.966765
UPL,68.80382,0.236647010,-0.01541678,0.76339841,23.64304,27.640148,3.2159915,1.3018215,3.2960603,6.017464,61,0.8884072,-0.4432457,0.5422187,0.5195574,0.4252861,2121.223,10.415134,6.795833
UPL,32.91496,-0.120091982,-0.04649486,-0.09224981,72.85970,66.251053,-7.1380959,4.3396287,11.9590702,5.840601,71,0.8774161,-0.4258616,0.5184364,0.4993690,0.4277071,2089.159,10.507970,6.645146
UPL,22.90202,0.321765453,-0.29542065,1.16392243,89.44859,54.192123,5.2654095,0.8310365,2.1920457,7.756568,67,0.8282877,-0.4125210,0.4203092,0.4080107,0.2758917,1929.628,10.383741,5.549780
UPL,256.57687,-0.309701890,-0.49268711,-0.23131222,309.67090,153.939667,11.5871830,2.6753097,6.3780303,7.926735,57,0.8722496,-0.5633449,0.5910003,0.5511211,0.3923204,2151.212,9.730947,8.968331
UPL,151.17870,0.327484548,0.38052917,0.74337298,47.28609,-179.600876,-6.5610490,3.2281992,8.0087433,5.817367,55,0.9094237,-0.4819886,0.6323696,0.5843077,0.4121353,2154.360,9.861394,7.962495
UPL,116.61768,-0.219294921,-0.09542012,-0.13739616,95.91204,-40.148834,15.1597271,3.4367521,9.1677504,6.387381,65,0.8802097,-0.4592847,0.5131344,0.4975280,0.4035520,2347.451,10.169700,8.382439
UPL,133.21484,0.496566951,0.39702713,0.97854477,338.70337,-191.009308,-8.6069202,2.5426164,6.2305126,8.039941,50,0.9144223,-0.4717989,0.6952219,0.6305297,0.4334228,2275.680,9.886416,7.745292
UPL,329.32935,0.076593921,0.05333271,-0.19032928,23.64304,7.657891,-4.9765115,6.1524467,16.7396412,4.364483,66,0.9053593,-0.4705703,0.5502989,0.5277288,0.4970152,3413.536,9.273639,8.310424


In [86]:
wip_rf_func(wa_hucs, datapath)

ERROR: Error in randomForest.default(m, y, ...): Need at least two classes to do classification.


In [87]:
test <- get(load("data/huc_data/RF_Models/WillapaBayrf_model.RData"))

In [88]:
test$importance

Unnamed: 0,UPL,WET,MeanDecreaseAccuracy,MeanDecreaseGini
NASADEM_HGT_n49w125,0.1002631975,0.3188382592,0.1596982278,459.18111
wa_dev27,0.0142798008,0.0173829764,0.0151229424,45.70231
wa_dev3,0.0043111442,0.0015163093,0.0035499356,20.12409
wa_dev81,0.0225123058,0.0611261281,0.0330131763,91.24544
wa_dinfsca,0.0001342427,0.001189427,0.0004209791,10.47788
wa_planc,0.0023218519,-0.0003667533,0.0015906154,20.08155
wa_profc,0.0002654276,0.0082915619,0.0024466569,23.4205
wa_rugged,0.0128132212,0.0236382001,0.0157540597,67.69128
wa_slope,0.0110536107,0.02938999,0.0160443028,48.8278
wa_twi,0.0026412402,0.0131051087,0.0054862797,18.04212


Combine all test confusion matrix dataframes and compare accuracies

In [91]:
read.csv("data/huc_data/RF_Models/WillapaBayrf_model_testCM.csv") 

X,Accuracy,Kappa,AccuracyLower,AccuracyUpper,AccuracyNull,AccuracyPValue,McnemarPValue,Sensitivity,Specificity,Pos.Pred.Value,Neg.Pred.Value,Precision,Recall,F1,Prevalence,Detection.Rate,Detection.Prevalence,Balanced.Accuracy
<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.9199668,0.7910422,0.9119483,0.9274725,0.7402032,7.738717e-224,0.4451772,0.9481793,0.839585,0.9439487,0.8504446,0.9439487,0.9481793,0.9460593,0.7402032,0.7018453,0.7435206,0.8938821


In [100]:
list_testacc <- as.list(list.files(path = paste0(datapath, "/RF_Models"), 
                                 full.names = TRUE, 
                                 pattern = "CM.csv",
                                 include.dirs = FALSE))

list_testacc

In [109]:
empty <- list()
for(i in 1:length(list_testacc)){
    csv <- read.csv(list_testacc[[i]])
    csv$X <- substr(list_testacc[[i]], 51, 60)
    empty[[i]] <- csv
}

In [112]:
testacc <- bind_rows(empty)
testacc

X,Accuracy,Kappa,AccuracyLower,AccuracyUpper,AccuracyNull,AccuracyPValue,McnemarPValue,Sensitivity,Specificity,Pos.Pred.Value,Neg.Pred.Value,Precision,Recall,F1,Prevalence,Detection.Rate,Detection.Prevalence,Balanced.Accuracy
<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
BanksLaker,0.9605374,0.6051966,0.9519224,0.9679947,0.9315701,1.035719e-09,1.028686e-14,0.9959441,0.4785276,0.962963,0.8965517,0.962963,0.9959441,0.9791759,0.9315701,0.9277918,0.9634761,0.7372359
ChiefJosep,0.9441784,0.7805233,0.9380959,0.9498284,0.8437655,2.62232e-130,1.053849e-05,0.9749462,0.7780127,0.9595454,0.8518519,0.9595454,0.9749462,0.9671845,0.8437655,0.8226259,0.857308,0.8764794
Colvillerf,0.9437934,0.8477297,0.9370706,0.9499957,0.7563059,5.231164e-279,0.8120554,0.9621849,0.8867156,0.9634499,0.8831169,0.9634499,0.9621849,0.962817,0.7563059,0.7277061,0.7553128,0.9244502
Duwamishrf,0.9034797,0.7174895,0.8909981,0.914972,0.7738194,1.182493e-62,0.018352,0.9475375,0.7527473,0.9291339,0.8074656,0.9291339,0.9475375,0.9382454,0.7738194,0.7332229,0.7891466,0.8501424
GraysHarbo,0.878309,0.671122,0.8649355,0.8908,0.7305413,1.188102e-73,1.975301e-13,0.9518659,0.6788856,0.889338,0.8387681,0.889338,0.9518659,0.9195402,0.7305413,0.6953773,0.7819044,0.8153758
Hangmanrf_,0.9222077,0.787787,0.9105003,0.932826,0.7461973,2.3516830000000002e-105,2.843681e-05,0.9644729,0.7979452,0.9334837,0.8842505,0.9334837,0.9644729,0.9487253,0.7461973,0.7196871,0.7709691,0.8812091
Hoh-Quilla,0.8956841,0.6513291,0.8874746,0.9034876,0.8041237,6.539836e-79,1.384797e-09,0.9513255,0.6672614,0.9214902,0.7695473,0.9214902,0.9513255,0.9361702,0.8041237,0.7649834,0.830159,0.8092934
Klickitatr,0.9165961,0.7671293,0.9099151,0.922932,0.7507762,2.695856e-284,1.452845e-20,0.9657895,0.7684032,0.9262665,0.8817414,0.9262665,0.9657895,0.9456152,0.7507762,0.7250917,0.7828112,0.8670963
LakeChelan,0.9816316,0.7697458,0.976771,0.9857087,0.9540789,1.0657189999999999e-19,0.0001703879,0.9949037,0.7058824,0.9859708,0.8695652,0.9859708,0.9949037,0.9904171,0.9540789,0.9492166,0.9627229,0.850393
LakeWashin,0.9399424,0.6491842,0.9297499,0.9490581,0.8893459,4.78491e-18,6.231088e-11,0.9847364,0.5799257,0.9495986,0.8253968,0.9495986,0.9847364,0.9668483,0.8893459,0.8757713,0.9222542,0.782331
