Prior to loading the dataset into R, it was pre-processed in Excel. The raw elemental intensities were each divided by the Rh column, which scaled each element, and a new column for each element was generated. Then, all amounts less than or equal to 0 were replaced with 0.0001, to avoid any issues with NaNs in the dataset.

# Load libraries

In [3]:
# try installing vctrs fresh, since it is giving me errors
install.packages("vctrs", dependencies = TRUE, repos = 'http://cran.us.r-project.org')
library(vctrs)

# Dicer library is used to run ensemble clustering, which generates interim labels for unknown samples
install.packages("diceR", dependencies = TRUE, repos = 'http://cran.us.r-project.org')
library(diceR)

# try installing Biobase, since I got error messages
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("Biobase")
library(Biobase)

# randomForest is the classification system which identifies unknown samples
install.packages("randomForest", dependencies = TRUE, repos = 'http://cran.us.r-project.org')
library(randomForest)


The downloaded binary packages are in
	/var/folders/pr/t9901z0n4z7dbkstysncwp0m0000gn/T//Rtmpy7T7Wb/downloaded_packages

The downloaded binary packages are in
	/var/folders/pr/t9901z0n4z7dbkstysncwp0m0000gn/T//Rtmpy7T7Wb/downloaded_packages


Bioconductor version 3.11 (BiocManager 1.30.10), R 4.0.2 (2020-06-22)

Installing package(s) 'Biobase'

also installing the dependency ‘BiocGenerics’





The downloaded binary packages are in
	/var/folders/pr/t9901z0n4z7dbkstysncwp0m0000gn/T//Rtmpy7T7Wb/downloaded_packages


Old packages: 'backports', 'callr', 'covr', 'devtools', 'DT', 'fs', 'glue',
  'IRkernel', 'jsonlite', 'knitr', 'MASS', 'mgcv', 'nlme', 'openssl',
  'pkgbuild', 'processx', 'ps', 'RcppArmadillo', 'remotes', 'stringi',
  'survival', 'sys', 'usethis', 'withr', 'xfun'

Loading required package: BiocGenerics

Loading required package: parallel


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colnames,
    dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
    grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
    order, paste, pmax, pmax.int,


The downloaded binary packages are in
	/var/folders/pr/t9901z0n4z7dbkstysncwp0m0000gn/T//Rtmpy7T7Wb/downloaded_packages


randomForest 4.6-14

Type rfNews() to see new features/changes/bug fixes.


Attaching package: ‘randomForest’


The following object is masked from ‘package:Biobase’:

    combine


The following object is masked from ‘package:BiocGenerics’:

    combine




# Load train dataset

In [4]:
train <- read.csv("AllSamples.csv", header = TRUE)
head(train)

Unnamed: 0_level_0,X,is_known,Vis,Ag,Al,As,Au,Ca,Cu,Fe,⋯,Si.Rh,Sn.Rh,Sr.Rh,Th.Rh,Ti.Rh,U.Rh,V.Rh,Y.Rh,Zn.Rh,Zr.Rh
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<int>,<dbl>,<dbl>,<dbl>,<int>,<int>,<int>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,71.11.1-89.01,Guess,Gravel Cherts,1368,1,0.0001,54,1515,331,5335,⋯,0.0623453,0.030244431,0.13691213,0.0232828,0.015702351,0.027769183,0.005491955,0.00363552,0.090423886,7.74e-09
2,71.11.1-89.02,Guess,Gravel Cherts,1269,15,55.0,25,682,300,2670,⋯,0.06373444,0.019170124,0.18995851,0.01294606,0.013609959,0.016431535,0.006887967,8.3e-05,0.07186722,8.3e-09
3,71.11.1-89.03,Guess,Gravel Cherts,1165,32,4.0,46,607,523,6119,⋯,0.05545573,0.010944136,0.09882391,0.007350539,0.018703038,0.008330611,0.005308723,0.004573669,0.039692911,0.002531852
4,71.11.1-89.04,Guess,Gravel Cherts,1299,40,42.0,45,338,410,5307,⋯,0.06011742,0.010667328,0.04589432,0.01174233,0.021582734,0.024559663,0.001901927,0.011080791,0.032746217,0.004796163
5,71.11.1-89.05,Guess,Gravel Cherts,1724,35,0.0001,51,201,301,1745,⋯,0.07111721,0.002502422,0.08411366,0.006780756,0.010251857,0.010494026,0.005247013,0.012431385,0.009283177,8.07e-09
6,71.11.1-89.06,Guess,Gravel Cherts,1246,139,159.0,18,188,292,4858,⋯,0.07108405,0.009646556,0.09575393,7.91e-09,0.008381434,0.006167471,0.001897683,7.91e-05,0.014627975,0.00585119


In [5]:
# restrict dataset to only important columns: sample id, is_known, and the elemental intensities
train_df = train[,c(1,2,3,32:58)]
# rename X to sample_id
names(train_df)[1] <- "sample_id"
# preview dataset
head(train_df)

Unnamed: 0_level_0,sample_id,is_known,Ag.Rh,Al.Rh,As.Rh,Au.Rh,Ca.Rh,Cu.Rh,Fe.Rh,K.Rh,⋯,Si.Rh,Sn.Rh,Sr.Rh,Th.Rh,Ti.Rh,U.Rh,V.Rh,Y.Rh,Zn.Rh,Zr.Rh
Unnamed: 0_level_1,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,71.11.1-89.01,Guess,0.10581683,7.74e-05,7.74e-09,0.00417698,0.1171875,0.02560334,0.4126702,0.010597153,⋯,0.0623453,0.030244431,0.13691213,0.0232828,0.015702351,0.027769183,0.005491955,0.00363552,0.090423886,7.74e-09
2,71.11.1-89.02,Guess,0.1053112,0.001244813,0.004564315,0.002074689,0.05659751,0.02489627,0.2215768,0.005394191,⋯,0.06373444,0.019170124,0.18995851,0.01294606,0.013609959,0.016431535,0.006887967,8.3e-05,0.07186722,8.3e-09
3,71.11.1-89.03,Guess,0.09514864,0.002613525,0.000326691,0.003756942,0.0495753,0.0427148,0.499755,0.015436132,⋯,0.05545573,0.010944136,0.09882391,0.007350539,0.018703038,0.008330611,0.005308723,0.004573669,0.039692911,0.002531852
4,71.11.1-89.04,Guess,0.10741751,0.003307699,0.003473084,0.003721161,0.02795005,0.03390391,0.4388489,0.013644257,⋯,0.06011742,0.010667328,0.04589432,0.01174233,0.021582734,0.024559663,0.001901927,0.011080791,0.032746217,0.004796163
5,71.11.1-89.05,Guess,0.13916694,0.002825315,8.07e-09,0.004116887,0.01622538,0.02429771,0.1408621,8.07e-05,⋯,0.07111721,0.002502422,0.08411366,0.006780756,0.010251857,0.010494026,0.005247013,0.012431385,0.009283177,8.07e-09
6,71.11.1-89.06,Guess,0.09852139,0.010990749,0.01257215,0.001423262,0.01486518,0.02308848,0.3841227,0.001265122,⋯,0.07108405,0.009646556,0.09575393,7.91e-09,0.008381434,0.006167471,0.001897683,7.91e-05,0.014627975,0.00585119


# Run ensemble clustering on train dataset

For this portion we used diceR to run a consensus cluster with the following settings:
- nk = 2 - 15 clusters, to be sure I captured all possible groupings
- reps = 1 because I found that multiple attempts to cluster the same way didn't change the result significantly
- We also broke up the number of clusters to alleviate processing time, so a max of 4 algorithms are run in each command.

In [6]:
cluster1 <- consensus_cluster(train_df[,4:30], nk=2:15, p.item=1, reps=1, 
                             algorithms=c("nmf", "hc", "diana", "km"), scale = FALSE)

New names:
* Ag.Rh -> Ag.Rh...1
* Al.Rh -> Al.Rh...2
* As.Rh -> As.Rh...3
* Au.Rh -> Au.Rh...4
* Ca.Rh -> Ca.Rh...5
* ...


Clustering Algorithm 1 of 5: NMF_Brunet (k = 3) [--------------]   3% eta:  6m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 4) [>-------------]   4% eta: 12m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 5) [>-------------]   6% eta: 13m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 6) [>-------------]   7% eta: 13m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 7) [>-------------]   9% eta: 14m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 8) [>-------------]  10% eta: 14m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 9) [=>------------]  11% eta: 14m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 10) [=>-----------]  13% eta: 15m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 11) [=>-----------]  14% eta: 15m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 12) [=>-----------]  16% eta: 15m

Clustering Algorithm 1 of 5: NMF_Brunet (k = 13) [=>-----------]  17% eta: 1

In [7]:
cluster2 <- consensus_cluster(train_df[,4:30], nk=2:15, p.item=1, reps=1, 
                             algorithms=c("pam", "ap", "sc"), scale = FALSE)


Clustering Algorithm 1 of 3: PAM_Euclidean (k = 4) [>----------]   7% eta:  4s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 5) [>----------]  10% eta:  5s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 6) [>----------]  12% eta:  5s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 7) [=>---------]  14% eta:  6s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 8) [=>---------]  17% eta:  7s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 9) [=>---------]  19% eta:  8s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 10) [=>--------]  21% eta: 10s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 11) [=>--------]  24% eta: 11s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 12) [==>-------]  26% eta: 12s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 13) [==>-------]  29% eta: 15s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 14) [==>-------]  31% eta: 19s

Clustering Algorithm 1 of 3: PAM_Euclidean (k = 15) [==>-------]  33% eta: 22s
































In [8]:
cluster3 <- consensus_cluster(train_df[,4:30], nk=2:15, p.item=1, reps=1, 
                             algorithms=c("block", "som", "cmeans"), scale = FALSE)


Clustering Algorithm 1 of 3: BLOCK (k = 3) [>------------------]   5% eta:  5s

Clustering Algorithm 1 of 3: BLOCK (k = 4) [>------------------]   7% eta:  7s

Clustering Algorithm 1 of 3: BLOCK (k = 5) [=>-----------------]  10% eta:  9s

Clustering Algorithm 1 of 3: BLOCK (k = 6) [=>-----------------]  12% eta: 11s

Clustering Algorithm 1 of 3: BLOCK (k = 7) [==>----------------]  14% eta: 16s

Clustering Algorithm 1 of 3: BLOCK (k = 8) [==>----------------]  17% eta: 23s

Clustering Algorithm 1 of 3: BLOCK (k = 9) [===>---------------]  19% eta: 33s

Clustering Algorithm 1 of 3: BLOCK (k = 10) [===>--------------]  21% eta: 44s

Clustering Algorithm 1 of 3: BLOCK (k = 11) [===>--------------]  24% eta:  1m

Clustering Algorithm 1 of 3: BLOCK (k = 12) [====>-------------]  26% eta:  1m

Clustering Algorithm 1 of 3: BLOCK (k = 13) [====>-------------]  29% eta:  1m

Clustering Algorithm 1 of 3: BLOCK (k = 14) [=====>------------]  31% eta:  1m

Clustering Algorithm 1 of 3: BLOCK (k =

# Choose a clustering method

In this section we examine each of the results of the clustering algorithms and choose that which best groups our 'known' groups together and separates them out. Before this can be done, however, we need to do some work turning the results of the clustering step into readable data frames.

In [54]:
# save the results of the algorithm as individual dataframes
brunet <- cluster1[,,"NMF_Brunet",1:14]
lee <- cluster1[,,"NMF_Lee",1:14]
hc <- cluster1[,,"HC_Euclidean",1:14]
diana <- cluster1[,,"DIANA_Euclidean",1:14]
km <- cluster1[,,"KM",1:14]
pam <- cluster2[,,"PAM_Euclidean",1:14]
ap <- cluster2[,,"AP",1:14]
sc <- cluster2[,,"SC",1:14]
block <- cluster3[,,"BLOCK",1:14]
som <- cluster3[,,"SOM",1:14]
cmeans <- cluster3[,,"CMEANS",1:14]

In [55]:
# preview one of the tables
head(brunet)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1,2,1,1,5,5,5,5,5,1,1,5,1,1,12
2,2,1,1,5,5,5,5,5,1,1,1,2,9,1
3,2,1,1,5,5,5,5,8,1,1,1,1,1,1
4,2,1,1,5,5,5,5,5,1,1,1,2,9,1
5,2,1,1,5,5,5,5,5,1,1,1,2,9,1
6,2,1,1,5,5,5,5,5,1,1,1,2,9,1


In [56]:
# Each number in the table refers to the group that each sample has been assigned to.
# Edit each value in the tables by adding "Group_" to each of the numbers so that they are strings and can be counted, not summed, by crosstab
brunet[,1:14] <- paste("Group", brunet[,1:14], sep = "_")
lee[,1:14] <- paste("Group", lee[,1:14], sep = "_")
hc[,1:14] <- paste("Group", hc[,1:14], sep = "_")
diana[,1:14] <- paste("Group", diana[,1:14], sep = "_")
km[,1:14] <- paste("Group", km[,1:14], sep = "_")
pam[,1:14] <- paste("Group", pam[,1:14], sep = "_")
ap[,1:14] <- paste("Group", ap[,1:14], sep = "_")
sc[,1:14] <- paste("Group", sc[,1:14], sep = "_")
block[,1:14] <- paste("Group", block[,1:14], sep = "_")
som[,1:14] <- paste("Group", som[,1:14], sep = "_")
cmeans[,1:14] <- paste("Group", cmeans[,1:14], sep = "_")
head(brunet)

Unnamed: 0,2,3,4,5,6,7,8,9,10,11,12,13,14,15
1,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_5,Group_1,Group_1,Group_12
2,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1
3,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_8,Group_1,Group_1,Group_1,Group_1,Group_1,Group_1
4,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1
5,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1
6,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1


In [57]:
# turn each of these matrices into dataframes
brunet <- as.data.frame(brunet)
lee <- as.data.frame(lee)
hc <- as.data.frame(hc)
diana <- as.data.frame(diana)
km <- as.data.frame(km)
pam <- as.data.frame(pam)
ap <- as.data.frame(ap)
sc <- as.data.frame(sc)
block <- as.data.frame(block)
som <- as.data.frame(som)
cmeans <- as.data.frame(cmeans)

head(brunet)

Unnamed: 0_level_0,2,3,4,5,6,7,8,9,10,11,12,13,14,15
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_5,Group_1,Group_1,Group_12
2,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1
3,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_8,Group_1,Group_1,Group_1,Group_1,Group_1,Group_1
4,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1
5,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1
6,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1


In [61]:
# then add the is_known & vis columns back in to each df
brunet$is_known <- train$is_known
brunet$vis <- train$Vis

lee$is_known <- train$is_known
lee$vis <- train$Vis

hc$is_known <- train$is_known
hc$vis <- train$Vis

diana$is_known <- train$is_known
diana$vis <- train$Vis

km$is_known <- train$is_known
km$vis <- train$Vis

pam$is_known <- train$is_known
pam$vis <- train$Vis

ap$is_known <- train$is_known
ap$vis <- train$Vis

sc$is_known <- train$is_known
sc$vis <- train$Vis

block$is_known <- train$is_known
block$vis <- train$Vis

som$is_known <- train$is_known
som$vis <- train$Vis

cmeans$is_known <- train$is_known
cmeans$vis <- train$Vis

head(brunet)

Unnamed: 0_level_0,2,3,4,5,6,7,8,9,10,11,12,13,14,15,is_known,vis
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_5,Group_1,Group_1,Group_12,Guess,Gravel Cherts
2,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1,Guess,Gravel Cherts
3,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_8,Group_1,Group_1,Group_1,Group_1,Group_1,Group_1,Guess,Gravel Cherts
4,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1,Guess,Gravel Cherts
5,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1,Guess,Gravel Cherts
6,Group_2,Group_1,Group_1,Group_5,Group_5,Group_5,Group_5,Group_5,Group_1,Group_1,Group_1,Group_2,Group_9,Group_1,Guess,Gravel Cherts


In [74]:
# choose the best performing option by crosstab
brunet_2_raw <- brunet[brunet$is_known == "Known",c(1,16)]
brunet_2_raw <- ftable(brunet_2_raw[])
brunet_2_raw <- as.data.frame(brunet_2_raw)
brunet_2_raw

X2,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,22
Group_1,Edwards Plateau Chert,13
Group_2,Edwards Plateau Chert,59
Group_1,El Sauz Chert,4
Group_2,El Sauz Chert,271
Group_1,Knife River Flint,5
Group_2,Knife River Flint,7
Group_1,Oman Chert,1
Group_2,Oman Chert,20


In [76]:
brunet_3_raw <- brunet[brunet$is_known == "Known",c(2,16)]
brunet_3_raw <- ftable(brunet_3_raw[])
brunet_3_raw <- as.data.frame(brunet_3_raw)
brunet_3_raw

X3,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,22
Group_2,Alibates,0
Group_3,Alibates,1
Group_1,Edwards Plateau Chert,53
Group_2,Edwards Plateau Chert,6
Group_3,Edwards Plateau Chert,13
Group_1,El Sauz Chert,147
Group_2,El Sauz Chert,121
Group_3,El Sauz Chert,7
Group_1,Knife River Flint,7


In [77]:
brunet_4_raw <- brunet[brunet$is_known == "Known",c(3,16)]
brunet_4_raw <- ftable(brunet_4_raw[])
brunet_4_raw <- as.data.frame(brunet_4_raw)
brunet_4_raw

X4,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,21
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_1,Edwards Plateau Chert,51
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,11
Group_4,Edwards Plateau Chert,10
Group_1,El Sauz Chert,144
Group_2,El Sauz Chert,114


In [78]:
brunet_5_raw <- brunet[brunet$is_known == "Known",c(4,16)]
brunet_5_raw <- ftable(brunet_5_raw[])
brunet_5_raw <- as.data.frame(brunet_5_raw)
brunet_5_raw

X5,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,2
Group_4,Alibates,0
Group_5,Alibates,21
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12
Group_4,Edwards Plateau Chert,8
Group_5,Edwards Plateau Chert,52


In [79]:
brunet_6_raw <- brunet[brunet$is_known == "Known",c(5,16)]
brunet_6_raw <- ftable(brunet_6_raw[])
brunet_6_raw <- as.data.frame(brunet_6_raw)
brunet_6_raw

X6,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,20
Group_6,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,7


In [80]:
brunet_7_raw <- brunet[brunet$is_known == "Known",c(6,16)]
brunet_7_raw <- ftable(brunet_7_raw[])
brunet_7_raw <- as.data.frame(brunet_7_raw)
brunet_7_raw

X7,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_5,Alibates,20
Group_7,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,8


In [81]:
brunet_8_raw <- brunet[brunet$is_known == "Known",c(7,16)]
brunet_8_raw <- ftable(brunet_8_raw[])
brunet_8_raw <- as.data.frame(brunet_8_raw)
brunet_8_raw

X8,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,1
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,20
Group_6,Alibates,1
Group_7,Alibates,0
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12


In [82]:
brunet_9_raw <- brunet[brunet$is_known == "Known",c(8,16)]
brunet_9_raw <- ftable(brunet_9_raw[])
brunet_9_raw <- as.data.frame(brunet_9_raw)
brunet_9_raw

X9,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,19
Group_6,Alibates,3
Group_7,Alibates,0
Group_8,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0


In [83]:
brunet_10_raw <- brunet[brunet$is_known == "Known",c(9,16)]
brunet_10_raw <- ftable(brunet_10_raw[])
brunet_10_raw <- as.data.frame(brunet_10_raw)
brunet_10_raw

X10,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,17
Group_10,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0
Group_9,Alibates,1
Group_1,Edwards Plateau Chert,47


In [84]:
brunet_11_raw <- brunet[brunet$is_known == "Known",c(10,16)]
brunet_11_raw <- ftable(brunet_11_raw[])
brunet_11_raw <- as.data.frame(brunet_11_raw)
brunet_11_raw

X11,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,16
Group_10,Alibates,0
Group_11,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,5
Group_6,Alibates,1
Group_7,Alibates,0
Group_8,Alibates,1


In [85]:
brunet_12_raw <- brunet[brunet$is_known == "Known",c(11,16)]
brunet_12_raw <- ftable(brunet_12_raw[])
brunet_12_raw <- as.data.frame(brunet_12_raw)
brunet_12_raw

X12,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,12
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_2,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,9
Group_6,Alibates,0
Group_7,Alibates,1
Group_8,Alibates,0


In [86]:
brunet_13_raw <- brunet[brunet$is_known == "Known",c(12,16)]
brunet_13_raw <- ftable(brunet_13_raw[])
brunet_13_raw <- as.data.frame(brunet_13_raw)
brunet_13_raw

X13,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,7
Group_10,Alibates,1
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,1
Group_2,Alibates,10
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0


In [87]:
brunet_14_raw <- brunet[brunet$is_known == "Known",c(13,16)]
brunet_14_raw <- ftable(brunet_14_raw[])
brunet_14_raw <- as.data.frame(brunet_14_raw)
brunet_14_raw

X14,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,13
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,4
Group_5,Alibates,0
Group_6,Alibates,0
Group_7,Alibates,1


In [88]:
brunet_15_raw <- brunet[brunet$is_known == "Known",c(14,16)]
brunet_15_raw <- ftable(brunet_15_raw[])
brunet_15_raw <- as.data.frame(brunet_15_raw)
brunet_15_raw

X15,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,11
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,4
Group_13,Alibates,0
Group_14,Alibates,0
Group_2,Alibates,1
Group_4,Alibates,6
Group_5,Alibates,0
Group_6,Alibates,0


In [74]:
# choose the best performing option by crosstab
lee_2_raw <- lee[lee$is_known == "Known",c(1,16)]
lee_2_raw <- ftable(lee_2_raw[])
lee_2_raw <- as.data.frame(lee_2_raw)
lee_2_raw

X2,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,22
Group_1,Edwards Plateau Chert,13
Group_2,Edwards Plateau Chert,59
Group_1,El Sauz Chert,4
Group_2,El Sauz Chert,271
Group_1,Knife River Flint,5
Group_2,Knife River Flint,7
Group_1,Oman Chert,1
Group_2,Oman Chert,20


In [76]:
lee_3_raw <- lee[lee$is_known == "Known",c(2,16)]
lee_3_raw <- ftable(lee_3_raw[])
lee_3_raw <- as.data.frame(lee_3_raw)
lee_3_raw

X3,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,22
Group_2,Alibates,0
Group_3,Alibates,1
Group_1,Edwards Plateau Chert,53
Group_2,Edwards Plateau Chert,6
Group_3,Edwards Plateau Chert,13
Group_1,El Sauz Chert,147
Group_2,El Sauz Chert,121
Group_3,El Sauz Chert,7
Group_1,Knife River Flint,7


In [77]:
lee_4_raw <- lee[lee$is_known == "Known",c(3,16)]
lee_4_raw <- ftable(lee_4_raw[])
lee_4_raw <- as.data.frame(lee_4_raw)
lee_4_raw

X4,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,21
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_1,Edwards Plateau Chert,51
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,11
Group_4,Edwards Plateau Chert,10
Group_1,El Sauz Chert,144
Group_2,El Sauz Chert,114


In [78]:
lee_5_raw <- lee[lee$is_known == "Known",c(4,16)]
lee_5_raw <- ftable(lee_5_raw[])
lee_5_raw <- as.data.frame(lee_5_raw)
lee_5_raw

X5,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,2
Group_4,Alibates,0
Group_5,Alibates,21
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12
Group_4,Edwards Plateau Chert,8
Group_5,Edwards Plateau Chert,52


In [79]:
lee_6_raw <- lee[lee$is_known == "Known",c(5,16)]
lee_6_raw <- ftable(lee_6_raw[])
lee_6_raw <- as.data.frame(lee_6_raw)
lee_6_raw

X6,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,20
Group_6,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,7


In [80]:
lee_7_raw <- lee[lee$is_known == "Known",c(6,16)]
lee_7_raw <- ftable(lee_7_raw[])
lee_7_raw <- as.data.frame(lee_7_raw)
lee_7_raw

X7,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_5,Alibates,20
Group_7,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,8


In [81]:
lee_8_raw <- lee[lee$is_known == "Known",c(7,16)]
lee_8_raw <- ftable(lee_8_raw[])
lee_8_raw <- as.data.frame(lee_8_raw)
lee_8_raw

X8,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,1
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,20
Group_6,Alibates,1
Group_7,Alibates,0
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12


In [82]:
lee_9_raw <- lee[lee$is_known == "Known",c(8,16)]
lee_9_raw <- ftable(lee_9_raw[])
lee_9_raw <- as.data.frame(lee_9_raw)
lee_9_raw

X9,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,19
Group_6,Alibates,3
Group_7,Alibates,0
Group_8,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0


In [83]:
lee_10_raw <- lee[lee$is_known == "Known",c(9,16)]
lee_10_raw <- ftable(lee_10_raw[])
lee_10_raw <- as.data.frame(lee_10_raw)
lee_10_raw

X10,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,17
Group_10,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0
Group_9,Alibates,1
Group_1,Edwards Plateau Chert,47


In [84]:
lee_11_raw <- lee[lee$is_known == "Known",c(10,16)]
lee_11_raw <- ftable(lee_11_raw[])
lee_11_raw <- as.data.frame(lee_11_raw)
lee_11_raw

X11,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,16
Group_10,Alibates,0
Group_11,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,5
Group_6,Alibates,1
Group_7,Alibates,0
Group_8,Alibates,1


In [85]:
lee_12_raw <- lee[lee$is_known == "Known",c(11,16)]
lee_12_raw <- ftable(lee_12_raw[])
lee_12_raw <- as.data.frame(lee_12_raw)
lee_12_raw

X12,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,12
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_2,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,9
Group_6,Alibates,0
Group_7,Alibates,1
Group_8,Alibates,0


In [86]:
lee_13_raw <- lee[lee$is_known == "Known",c(12,16)]
lee_13_raw <- ftable(lee_13_raw[])
lee_13_raw <- as.data.frame(lee_13_raw)
lee_13_raw

X13,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,7
Group_10,Alibates,1
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,1
Group_2,Alibates,10
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0


In [87]:
lee_14_raw <- lee[lee$is_known == "Known",c(13,16)]
lee_14_raw <- ftable(lee_14_raw[])
lee_14_raw <- as.data.frame(lee_14_raw)
lee_14_raw

X14,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,13
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,4
Group_5,Alibates,0
Group_6,Alibates,0
Group_7,Alibates,1


In [88]:
lee_15_raw <- lee[lee$is_known == "Known",c(14,16)]
lee_15_raw <- ftable(lee_15_raw[])
lee_15_raw <- as.data.frame(lee_15_raw)
lee_15_raw

X15,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,11
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,4
Group_13,Alibates,0
Group_14,Alibates,0
Group_2,Alibates,1
Group_4,Alibates,6
Group_5,Alibates,0
Group_6,Alibates,0


In [74]:
# choose the best performing option by crosstab
hc_2_raw <- hc[hc$is_known == "Known",c(1,16)]
hc_2_raw <- ftable(hc_2_raw[])
hc_2_raw <- as.data.frame(hc_2_raw)
hc_2_raw

X2,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,22
Group_1,Edwards Plateau Chert,13
Group_2,Edwards Plateau Chert,59
Group_1,El Sauz Chert,4
Group_2,El Sauz Chert,271
Group_1,Knife River Flint,5
Group_2,Knife River Flint,7
Group_1,Oman Chert,1
Group_2,Oman Chert,20


In [76]:
hc_3_raw <- hc[hc$is_known == "Known",c(2,16)]
hc_3_raw <- ftable(hc_3_raw[])
hc_3_raw <- as.data.frame(hc_3_raw)
hc_3_raw

X3,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,22
Group_2,Alibates,0
Group_3,Alibates,1
Group_1,Edwards Plateau Chert,53
Group_2,Edwards Plateau Chert,6
Group_3,Edwards Plateau Chert,13
Group_1,El Sauz Chert,147
Group_2,El Sauz Chert,121
Group_3,El Sauz Chert,7
Group_1,Knife River Flint,7


In [77]:
hc_4_raw <- hc[hc$is_known == "Known",c(3,16)]
hc_4_raw <- ftable(hc_4_raw[])
hc_4_raw <- as.data.frame(hc_4_raw)
hc_4_raw

X4,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,21
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_1,Edwards Plateau Chert,51
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,11
Group_4,Edwards Plateau Chert,10
Group_1,El Sauz Chert,144
Group_2,El Sauz Chert,114


In [78]:
hc_5_raw <- hc[hc$is_known == "Known",c(4,16)]
hc_5_raw <- ftable(hc_5_raw[])
hc_5_raw <- as.data.frame(hc_5_raw)
hc_5_raw

X5,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,2
Group_4,Alibates,0
Group_5,Alibates,21
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12
Group_4,Edwards Plateau Chert,8
Group_5,Edwards Plateau Chert,52


In [79]:
hc_6_raw <- hc[hc$is_known == "Known",c(5,16)]
hc_6_raw <- ftable(hc_6_raw[])
hc_6_raw <- as.data.frame(hc_6_raw)
hc_6_raw

X6,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,20
Group_6,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,7


In [80]:
hc_7_raw <- hc[hc$is_known == "Known",c(6,16)]
hc_7_raw <- ftable(hc_7_raw[])
hc_7_raw <- as.data.frame(hc_7_raw)
hc_7_raw

X7,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_5,Alibates,20
Group_7,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,8


In [81]:
hc_8_raw <- hc[hc$is_known == "Known",c(7,16)]
hc_8_raw <- ftable(hc_8_raw[])
hc_8_raw <- as.data.frame(hc_8_raw)
hc_8_raw

X8,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,1
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,20
Group_6,Alibates,1
Group_7,Alibates,0
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12


In [82]:
hc_9_raw <- hc[hc$is_known == "Known",c(8,16)]
hc_9_raw <- ftable(hc_9_raw[])
hc_9_raw <- as.data.frame(hc_9_raw)
hc_9_raw

X9,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,19
Group_6,Alibates,3
Group_7,Alibates,0
Group_8,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0


In [83]:
hc_10_raw <- hc[hc$is_known == "Known",c(9,16)]
hc_10_raw <- ftable(hc_10_raw[])
hc_10_raw <- as.data.frame(hc_10_raw)
hc_10_raw

X10,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,17
Group_10,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0
Group_9,Alibates,1
Group_1,Edwards Plateau Chert,47


In [84]:
hc_11_raw <- hc[hc$is_known == "Known",c(10,16)]
hc_11_raw <- ftable(hc_11_raw[])
hc_11_raw <- as.data.frame(hc_11_raw)
hc_11_raw

X11,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,16
Group_10,Alibates,0
Group_11,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,5
Group_6,Alibates,1
Group_7,Alibates,0
Group_8,Alibates,1


In [85]:
hc_12_raw <- hc[hc$is_known == "Known",c(11,16)]
hc_12_raw <- ftable(hc_12_raw[])
hc_12_raw <- as.data.frame(hc_12_raw)
hc_12_raw

X12,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,12
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_2,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,9
Group_6,Alibates,0
Group_7,Alibates,1
Group_8,Alibates,0


In [86]:
hc_13_raw <- hc[hc$is_known == "Known",c(12,16)]
hc_13_raw <- ftable(hc_13_raw[])
hc_13_raw <- as.data.frame(hc_13_raw)
hc_13_raw

X13,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,7
Group_10,Alibates,1
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,1
Group_2,Alibates,10
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0


In [87]:
hc_14_raw <- hc[hc$is_known == "Known",c(13,16)]
hc_14_raw <- ftable(hc_14_raw[])
hc_14_raw <- as.data.frame(hc_14_raw)
hc_14_raw

X14,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,13
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,4
Group_5,Alibates,0
Group_6,Alibates,0
Group_7,Alibates,1


In [88]:
hc_15_raw <- hc[hc$is_known == "Known",c(14,16)]
hc_15_raw <- ftable(hc_15_raw[])
hc_15_raw <- as.data.frame(hc_15_raw)
hc_15_raw

X15,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,11
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,4
Group_13,Alibates,0
Group_14,Alibates,0
Group_2,Alibates,1
Group_4,Alibates,6
Group_5,Alibates,0
Group_6,Alibates,0


In [74]:
# choose the best performing option by crosstab
diana_2_raw <- diana[diana$is_known == "Known",c(1,16)]
diana_2_raw <- ftable(diana_2_raw[])
diana_2_raw <- as.data.frame(diana_2_raw)
diana_2_raw

X2,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,22
Group_1,Edwards Plateau Chert,13
Group_2,Edwards Plateau Chert,59
Group_1,El Sauz Chert,4
Group_2,El Sauz Chert,271
Group_1,Knife River Flint,5
Group_2,Knife River Flint,7
Group_1,Oman Chert,1
Group_2,Oman Chert,20


In [76]:
diana_3_raw <- diana[diana$is_known == "Known",c(2,16)]
diana_3_raw <- ftable(diana_3_raw[])
diana_3_raw <- as.data.frame(diana_3_raw)
diana_3_raw

X3,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,22
Group_2,Alibates,0
Group_3,Alibates,1
Group_1,Edwards Plateau Chert,53
Group_2,Edwards Plateau Chert,6
Group_3,Edwards Plateau Chert,13
Group_1,El Sauz Chert,147
Group_2,El Sauz Chert,121
Group_3,El Sauz Chert,7
Group_1,Knife River Flint,7


In [77]:
diana_4_raw <- diana[diana$is_known == "Known",c(3,16)]
diana_4_raw <- ftable(diana_4_raw[])
diana_4_raw <- as.data.frame(diana_4_raw)
diana_4_raw

X4,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,21
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_1,Edwards Plateau Chert,51
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,11
Group_4,Edwards Plateau Chert,10
Group_1,El Sauz Chert,144
Group_2,El Sauz Chert,114


In [78]:
diana_5_raw <- diana[diana$is_known == "Known",c(4,16)]
diana_5_raw <- ftable(diana_5_raw[])
diana_5_raw <- as.data.frame(diana_5_raw)
diana_5_raw

X5,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,2
Group_4,Alibates,0
Group_5,Alibates,21
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12
Group_4,Edwards Plateau Chert,8
Group_5,Edwards Plateau Chert,52


In [79]:
diana_6_raw <- diana[diana$is_known == "Known",c(5,16)]
diana_6_raw <- ftable(diana_6_raw[])
diana_6_raw <- as.data.frame(diana_6_raw)
diana_6_raw

X6,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,20
Group_6,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,7


In [80]:
diana_7_raw <- diana[diana$is_known == "Known",c(6,16)]
diana_7_raw <- ftable(diana_7_raw[])
diana_7_raw <- as.data.frame(diana_7_raw)
diana_7_raw

X7,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_5,Alibates,20
Group_7,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,8


In [81]:
diana_8_raw <- diana[diana$is_known == "Known",c(7,16)]
diana_8_raw <- ftable(diana_8_raw[])
diana_8_raw <- as.data.frame(diana_8_raw)
diana_8_raw

X8,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,1
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,20
Group_6,Alibates,1
Group_7,Alibates,0
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12


In [82]:
diana_9_raw <- diana[diana$is_known == "Known",c(8,16)]
diana_9_raw <- ftable(diana_9_raw[])
diana_9_raw <- as.data.frame(diana_9_raw)
diana_9_raw

X9,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,19
Group_6,Alibates,3
Group_7,Alibates,0
Group_8,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0


In [83]:
diana_10_raw <- diana[diana$is_known == "Known",c(9,16)]
diana_10_raw <- ftable(diana_10_raw[])
diana_10_raw <- as.data.frame(diana_10_raw)
diana_10_raw

X10,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,17
Group_10,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0
Group_9,Alibates,1
Group_1,Edwards Plateau Chert,47


In [84]:
diana_11_raw <- diana[diana$is_known == "Known",c(10,16)]
diana_11_raw <- ftable(diana_11_raw[])
diana_11_raw <- as.data.frame(diana_11_raw)
diana_11_raw

X11,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,16
Group_10,Alibates,0
Group_11,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,5
Group_6,Alibates,1
Group_7,Alibates,0
Group_8,Alibates,1


In [85]:
diana_12_raw <- diana[diana$is_known == "Known",c(11,16)]
diana_12_raw <- ftable(diana_12_raw[])
diana_12_raw <- as.data.frame(diana_12_raw)
diana_12_raw

X12,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,12
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_2,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,9
Group_6,Alibates,0
Group_7,Alibates,1
Group_8,Alibates,0


In [86]:
diana_13_raw <- diana[diana$is_known == "Known",c(12,16)]
diana_13_raw <- ftable(diana_13_raw[])
diana_13_raw <- as.data.frame(diana_13_raw)
diana_13_raw

X13,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,7
Group_10,Alibates,1
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,1
Group_2,Alibates,10
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0


In [87]:
diana_14_raw <- diana[diana$is_known == "Known",c(13,16)]
diana_14_raw <- ftable(diana_14_raw[])
diana_14_raw <- as.data.frame(diana_14_raw)
diana_14_raw

X14,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,13
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,4
Group_5,Alibates,0
Group_6,Alibates,0
Group_7,Alibates,1


In [88]:
diana_15_raw <- diana[diana$is_known == "Known",c(14,16)]
diana_15_raw <- ftable(diana_15_raw[])
diana_15_raw <- as.data.frame(diana_15_raw)
diana_15_raw

X15,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,11
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,4
Group_13,Alibates,0
Group_14,Alibates,0
Group_2,Alibates,1
Group_4,Alibates,6
Group_5,Alibates,0
Group_6,Alibates,0


In [74]:
# choose the best performing option by crosstab
km_2_raw <- km[km$is_known == "Known",c(1,16)]
km_2_raw <- ftable(km_2_raw[])
km_2_raw <- as.data.frame(km_2_raw)
km_2_raw

X2,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,22
Group_1,Edwards Plateau Chert,13
Group_2,Edwards Plateau Chert,59
Group_1,El Sauz Chert,4
Group_2,El Sauz Chert,271
Group_1,Knife River Flint,5
Group_2,Knife River Flint,7
Group_1,Oman Chert,1
Group_2,Oman Chert,20


In [76]:
km_3_raw <- km[km$is_known == "Known",c(2,16)]
km_3_raw <- ftable(km_3_raw[])
km_3_raw <- as.data.frame(km_3_raw)
km_3_raw

X3,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,22
Group_2,Alibates,0
Group_3,Alibates,1
Group_1,Edwards Plateau Chert,53
Group_2,Edwards Plateau Chert,6
Group_3,Edwards Plateau Chert,13
Group_1,El Sauz Chert,147
Group_2,El Sauz Chert,121
Group_3,El Sauz Chert,7
Group_1,Knife River Flint,7


In [77]:
km_4_raw <- km[km$is_known == "Known",c(3,16)]
km_4_raw <- ftable(km_4_raw[])
km_4_raw <- as.data.frame(km_4_raw)
km_4_raw

X4,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,21
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_1,Edwards Plateau Chert,51
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,11
Group_4,Edwards Plateau Chert,10
Group_1,El Sauz Chert,144
Group_2,El Sauz Chert,114


In [78]:
km_5_raw <- km[km$is_known == "Known",c(4,16)]
km_5_raw <- ftable(km_5_raw[])
km_5_raw <- as.data.frame(km_5_raw)
km_5_raw

X5,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,2
Group_4,Alibates,0
Group_5,Alibates,21
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12
Group_4,Edwards Plateau Chert,8
Group_5,Edwards Plateau Chert,52


In [79]:
km_6_raw <- km[km$is_known == "Known",c(5,16)]
km_6_raw <- ftable(km_6_raw[])
km_6_raw <- as.data.frame(km_6_raw)
km_6_raw

X6,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,20
Group_6,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,7


In [80]:
km_7_raw <- km[km$is_known == "Known",c(6,16)]
km_7_raw <- ftable(km_7_raw[])
km_7_raw <- as.data.frame(km_7_raw)
km_7_raw

X7,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_5,Alibates,20
Group_7,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,8


In [81]:
km_8_raw <- km[km$is_known == "Known",c(7,16)]
km_8_raw <- ftable(km_8_raw[])
km_8_raw <- as.data.frame(km_8_raw)
km_8_raw

X8,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,1
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,20
Group_6,Alibates,1
Group_7,Alibates,0
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12


In [82]:
km_9_raw <- km[km$is_known == "Known",c(8,16)]
km_9_raw <- ftable(km_9_raw[])
km_9_raw <- as.data.frame(km_9_raw)
km_9_raw

X9,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,19
Group_6,Alibates,3
Group_7,Alibates,0
Group_8,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0


In [83]:
km_10_raw <- km[km$is_known == "Known",c(9,16)]
km_10_raw <- ftable(km_10_raw[])
km_10_raw <- as.data.frame(km_10_raw)
km_10_raw

X10,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,17
Group_10,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0
Group_9,Alibates,1
Group_1,Edwards Plateau Chert,47


In [84]:
km_11_raw <- km[km$is_known == "Known",c(10,16)]
km_11_raw <- ftable(km_11_raw[])
km_11_raw <- as.data.frame(km_11_raw)
km_11_raw

X11,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,16
Group_10,Alibates,0
Group_11,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,5
Group_6,Alibates,1
Group_7,Alibates,0
Group_8,Alibates,1


In [85]:
km_12_raw <- km[km$is_known == "Known",c(11,16)]
km_12_raw <- ftable(km_12_raw[])
km_12_raw <- as.data.frame(km_12_raw)
km_12_raw

X12,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,12
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_2,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,9
Group_6,Alibates,0
Group_7,Alibates,1
Group_8,Alibates,0


In [86]:
km_13_raw <- km[km$is_known == "Known",c(12,16)]
km_13_raw <- ftable(km_13_raw[])
km_13_raw <- as.data.frame(km_13_raw)
km_13_raw

X13,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,7
Group_10,Alibates,1
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,1
Group_2,Alibates,10
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0


In [87]:
km_14_raw <- km[km$is_known == "Known",c(13,16)]
km_14_raw <- ftable(km_14_raw[])
km_14_raw <- as.data.frame(km_14_raw)
km_14_raw

X14,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,13
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,4
Group_5,Alibates,0
Group_6,Alibates,0
Group_7,Alibates,1


In [88]:
km_15_raw <- km[km$is_known == "Known",c(14,16)]
km_15_raw <- ftable(km_15_raw[])
km_15_raw <- as.data.frame(km_15_raw)
km_15_raw

X15,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,11
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,4
Group_13,Alibates,0
Group_14,Alibates,0
Group_2,Alibates,1
Group_4,Alibates,6
Group_5,Alibates,0
Group_6,Alibates,0


In [74]:
# choose the best performing option by crosstab
lee_2_raw <- lee[lee$is_known == "Known",c(1,16)]
lee_2_raw <- ftable(lee_2_raw[])
lee_2_raw <- as.data.frame(lee_2_raw)
lee_2_raw

X2,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,22
Group_1,Edwards Plateau Chert,13
Group_2,Edwards Plateau Chert,59
Group_1,El Sauz Chert,4
Group_2,El Sauz Chert,271
Group_1,Knife River Flint,5
Group_2,Knife River Flint,7
Group_1,Oman Chert,1
Group_2,Oman Chert,20


In [76]:
lee_3_raw <- lee[lee$is_known == "Known",c(2,16)]
lee_3_raw <- ftable(lee_3_raw[])
lee_3_raw <- as.data.frame(lee_3_raw)
lee_3_raw

X3,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,22
Group_2,Alibates,0
Group_3,Alibates,1
Group_1,Edwards Plateau Chert,53
Group_2,Edwards Plateau Chert,6
Group_3,Edwards Plateau Chert,13
Group_1,El Sauz Chert,147
Group_2,El Sauz Chert,121
Group_3,El Sauz Chert,7
Group_1,Knife River Flint,7


In [77]:
lee_4_raw <- lee[lee$is_known == "Known",c(3,16)]
lee_4_raw <- ftable(lee_4_raw[])
lee_4_raw <- as.data.frame(lee_4_raw)
lee_4_raw

X4,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,21
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_1,Edwards Plateau Chert,51
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,11
Group_4,Edwards Plateau Chert,10
Group_1,El Sauz Chert,144
Group_2,El Sauz Chert,114


In [78]:
lee_5_raw <- lee[lee$is_known == "Known",c(4,16)]
lee_5_raw <- ftable(lee_5_raw[])
lee_5_raw <- as.data.frame(lee_5_raw)
lee_5_raw

X5,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,2
Group_4,Alibates,0
Group_5,Alibates,21
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12
Group_4,Edwards Plateau Chert,8
Group_5,Edwards Plateau Chert,52


In [79]:
lee_6_raw <- lee[lee$is_known == "Known",c(5,16)]
lee_6_raw <- ftable(lee_6_raw[])
lee_6_raw <- as.data.frame(lee_6_raw)
lee_6_raw

X6,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,1
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,20
Group_6,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,7


In [80]:
lee_7_raw <- lee[lee$is_known == "Known",c(6,16)]
lee_7_raw <- ftable(lee_7_raw[])
lee_7_raw <- as.data.frame(lee_7_raw)
lee_7_raw

X7,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,1
Group_5,Alibates,20
Group_7,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,13
Group_4,Edwards Plateau Chert,8


In [81]:
lee_8_raw <- lee[lee$is_known == "Known",c(7,16)]
lee_8_raw <- ftable(lee_8_raw[])
lee_8_raw <- as.data.frame(lee_8_raw)
lee_8_raw

X8,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,1
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,20
Group_6,Alibates,1
Group_7,Alibates,0
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0
Group_3,Edwards Plateau Chert,12


In [82]:
lee_9_raw <- lee[lee$is_known == "Known",c(8,16)]
lee_9_raw <- ftable(lee_9_raw[])
lee_9_raw <- as.data.frame(lee_9_raw)
lee_9_raw

X9,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,19
Group_6,Alibates,3
Group_7,Alibates,0
Group_8,Alibates,1
Group_1,Edwards Plateau Chert,0
Group_2,Edwards Plateau Chert,0


In [83]:
lee_10_raw <- lee[lee$is_known == "Known",c(9,16)]
lee_10_raw <- ftable(lee_10_raw[])
lee_10_raw <- as.data.frame(lee_10_raw)
lee_10_raw

X10,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,17
Group_10,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,1
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0
Group_9,Alibates,1
Group_1,Edwards Plateau Chert,47


In [84]:
lee_11_raw <- lee[lee$is_known == "Known",c(10,16)]
lee_11_raw <- ftable(lee_11_raw[])
lee_11_raw <- as.data.frame(lee_11_raw)
lee_11_raw

X11,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,16
Group_10,Alibates,0
Group_11,Alibates,0
Group_2,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,0
Group_5,Alibates,5
Group_6,Alibates,1
Group_7,Alibates,0
Group_8,Alibates,1


In [85]:
lee_12_raw <- lee[lee$is_known == "Known",c(11,16)]
lee_12_raw <- ftable(lee_12_raw[])
lee_12_raw <- as.data.frame(lee_12_raw)
lee_12_raw

X12,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,12
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_2,Alibates,0
Group_4,Alibates,1
Group_5,Alibates,9
Group_6,Alibates,0
Group_7,Alibates,1
Group_8,Alibates,0


In [86]:
lee_13_raw <- lee[lee$is_known == "Known",c(12,16)]
lee_13_raw <- ftable(lee_13_raw[])
lee_13_raw <- as.data.frame(lee_13_raw)
lee_13_raw

X13,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,7
Group_10,Alibates,1
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,1
Group_2,Alibates,10
Group_4,Alibates,0
Group_5,Alibates,2
Group_6,Alibates,2
Group_7,Alibates,0


In [87]:
lee_14_raw <- lee[lee$is_known == "Known",c(13,16)]
lee_14_raw <- ftable(lee_14_raw[])
lee_14_raw <- as.data.frame(lee_14_raw)
lee_14_raw

X14,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,13
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,0
Group_13,Alibates,0
Group_3,Alibates,0
Group_4,Alibates,4
Group_5,Alibates,0
Group_6,Alibates,0
Group_7,Alibates,1


In [88]:
lee_15_raw <- lee[lee$is_known == "Known",c(14,16)]
lee_15_raw <- ftable(lee_15_raw[])
lee_15_raw <- as.data.frame(lee_15_raw)
lee_15_raw

X15,vis,Freq
<fct>,<fct>,<int>
Group_1,Alibates,11
Group_10,Alibates,0
Group_11,Alibates,0
Group_12,Alibates,4
Group_13,Alibates,0
Group_14,Alibates,0
Group_2,Alibates,1
Group_4,Alibates,6
Group_5,Alibates,0
Group_6,Alibates,0


## Random Forests

In this section we will create two random forest classifications. The first will be run using the labels generated by visual classification, which we know are not optimal. The second will be built with the labels generated by the clustering algorithm selected in the previous section.

1. RF built with vis labels
2. RF built with chosen cluster labels

Load test dataset

In [None]:
test <- read.csv("Test.csv", header = TRUE)