# Class data #

First we will create separate subsets for Monday and Tuesday, then develop separate linear regression models to compare. 

In [None]:
library(ggplot2)
library(cowplot)

tree_data <- read.csv('bees1041_tree_data_2022.csv')

tree_data$dbh_calc <- tree_data$Circumference_cm / pi
tree_data$dbh_error <- abs(tree_data$dbh_cm - tree_data$dbh_calc)
tree_data <- subset(tree_data, dbh_error < 10) # Use a threshold error of 10 cm

tree_data$Angle_radians <- tree_data$Angle * (pi/180)
tree_data$tree_height_calc <- tree_data$Eye_height_m + (tree_data$Distance_m * tan(tree_data$Angle_radians))
tree_data$height_error <- abs(tree_data$Tree_height_m - tree_data$tree_height_calc)
tree_data <- subset(tree_data, height_error < 5) # Use a treshold error of 10 m

tree_data <- subset(tree_data, Tree_type == 'Eucalyptus grandis')

tree_mon <- subset(tree_data, Day == 'Monday')

tree_tues <- subset(tree_data, Day == 'Tuesday')

mon_model <- lm(data = tree_mon, Tree_height_m ~ dbh_cm)
summary(mon_model)
tree_mon$predicted_height <- predict(mon_model)

tues_model <- lm(data = tree_tues, Tree_height_m ~ dbh_cm)
summary(tues_model)
tree_tues$predicted_height <- predict(tues_model)

monday <- ggplot(tree_mon, aes(x = dbh_cm, y = Tree_height_m)) +
                    geom_point() +
                    labs(x = "Diameter (cm)", y = "Height (m)", title = "(A) Monday") +
                    theme_bw() +
                    geom_line(aes(y = predicted_height), size = 1, color = 'red') +
                    coord_cartesian(xlim=c(min(tree_mon$dbh_cm), 1.02*max(tree_tues$dbh_cm)),
                                    ylim=c(min(tree_mon$Tree_height_m), 1.02*max(tree_tues$Tree_height_m)))

tuesday <- ggplot(tree_tues, aes(x = dbh_cm, y = Tree_height_m)) +
                   geom_point() +
                   labs(x = "Diameter (cm)", y = "Height (m)", title = "(B) Tuesday") +
                   theme_bw() +
                   geom_line(aes(y = predicted_height), size = 1, color = 'red') +
                   coord_cartesian(xlim=c(min(tree_mon$dbh_cm), 1.02*max(tree_tues$dbh_cm)),
                                   ylim=c(min(tree_mon$Tree_height_m), 1.02*max(tree_tues$Tree_height_m)))

options(repr.plot.width=5.5, repr.plot.height=3)
plot_grid(monday, tuesday, ncol = 2, align = "h")


# Ausplot forests data #

First, I will load the Ausplots Forests data and create the same subset of Eucalypt species we had in the exercise.

The last line of code prints out the which species have the most trees in the data subset.

In [None]:
library(tidyverse)

ausplots_data <- read.csv('ausplot_forest_data.csv')

ausplots_subset <- subset(ausplots_data,
                          (Point_Of_Measurement == 1.3) &
                          (Diameter > 0) &
                          (Height > 0) &
                          (Tree_Status == 'A'))

ausplots_subset <- separate(ausplots_subset, Genus_Species, c("Genus", "Species"),
                            sep = " ", extra = "drop",
                            remove = FALSE, fill = "right")

ausplots_subset <- subset(ausplots_subset,
                          (Genus == "Eucalyptus") |
                          (Genus == "Angophora") |
                          (Genus == "Corymbia"))

ausplot_model <- lm(Height ~ Diameter, data = ausplots_subset)
ausplots_subset$predicted_height <- predict(ausplot_model)

head(summary(ausplots_subset$Genus_Species))

Now I will make plots for each of these species, adding a line for the regression model, and including the $R^2$ value. I also add a dashed line for the regression model we created from the whole dataset. Once I had a good plot for the first species I copied and pasted for the other graphs, just changing the species name.

In [None]:
options(repr.plot.width=3, repr.plot.height=3)

species <- "Eucalyptus diversicolor"
euc_subset <- subset(ausplots_subset, Genus_Species == species)
model <- lm(Height ~ Diameter, data = euc_subset)
rsquared <- summary(model)$r.squared
euc_subset$predicted_height <- predict(model)
print(ggplot(euc_subset, aes(x = Diameter, y = Height)) +
       geom_point() +
       labs(x = "Diameter (cm)", y = "Height (m)", title = species) +
       geom_line(aes(y = predicted_height), size = 1, color = 'red') +
       geom_line(data = ausplots_subset, aes(x = Diameter, y = predicted_height), size = 1, color = 'red', linetype = 2) +
       theme_bw() +
       geom_text(x = 25, y = 80, aes(label = format(rsquared, digits = 2), family = 'mono')) +
       coord_cartesian(xlim=c(min(ausplots_subset$Diameter), 1.02*max(ausplots_subset$Diameter)),
                       ylim=c(min(ausplots_subset$Height), 1.02*max(ausplots_subset$Height))))

species <- "Eucalyptus obliqua"
euc_subset <- subset(ausplots_subset, Genus_Species == species)
model <- lm(Height ~ Diameter, data = euc_subset)
rsquared <- summary(model)$r.squared
euc_subset$predicted_height <- predict(model)
print(ggplot(euc_subset, aes(x = Diameter, y = Height)) +
       geom_point() +
       labs(x = "Diameter (cm)", y = "Height (m)", title = species) +
       geom_line(aes(y = predicted_height), size = 1, color = 'red') +
       geom_line(data = ausplots_subset, aes(x = Diameter, y = predicted_height), size = 1, color = 'red', linetype = 2) +
       theme_bw() +
       geom_text(x = 25, y = 80, aes(label = format(rsquared, digits = 2), family = 'mono')) +
       coord_cartesian(xlim=c(min(ausplots_subset$Diameter), 1.02*max(ausplots_subset$Diameter)),
                       ylim=c(min(ausplots_subset$Height), 1.02*max(ausplots_subset$Height))))

species <- "Eucalyptus grandis"
euc_subset <- subset(ausplots_subset, Genus_Species == species)
model <- lm(Height ~ Diameter, data = euc_subset)
rsquared <- summary(model)$r.squared
euc_subset$predicted_height <- predict(model)
print(ggplot(euc_subset, aes(x = Diameter, y = Height)) +
       geom_point() +
       labs(x = "Diameter (cm)", y = "Height (m)", title = species) +
       geom_line(aes(y = predicted_height), size = 1, color = 'red') +
       geom_line(data = ausplots_subset, aes(x = Diameter, y = predicted_height), size = 1, color = 'red', linetype = 2) +
       theme_bw() +
       geom_text(x = 25, y = 80, aes(label = format(rsquared, digits = 2), family = 'mono')) +
       coord_cartesian(xlim=c(min(ausplots_subset$Diameter), 1.02*max(ausplots_subset$Diameter)),
                       ylim=c(min(ausplots_subset$Height), 1.02*max(ausplots_subset$Height))))

species <- "Eucalyptus regnans"
euc_subset <- subset(ausplots_subset, Genus_Species == species)
model <- lm(Height ~ Diameter, data = euc_subset)
rsquared <- summary(model)$r.squared
euc_subset$predicted_height <- predict(model)
print(ggplot(euc_subset, aes(x = Diameter, y = Height)) +
       geom_point() +
       labs(x = "Diameter (cm)", y = "Height (m)", title = species) +
       geom_line(aes(y = predicted_height), size = 1, color = 'red') +
       geom_line(data = ausplots_subset, aes(x = Diameter, y = predicted_height), size = 1, color = 'red', linetype = 2) +
       theme_bw() +
       geom_text(x = 25, y = 80, aes(label = format(rsquared, digits = 2), family = 'mono')) +
       coord_cartesian(xlim=c(min(ausplots_subset$Diameter), 1.02*max(ausplots_subset$Diameter)),
                       ylim=c(min(ausplots_subset$Height), 1.02*max(ausplots_subset$Height))))

species <- "Eucalyptus delegatensis"
euc_subset <- subset(ausplots_subset, Genus_Species == species)
model <- lm(Height ~ Diameter, data = euc_subset)
rsquared <- summary(model)$r.squared
euc_subset$predicted_height <- predict(model)
print(ggplot(euc_subset, aes(x = Diameter, y = Height)) +
       geom_point() +
       labs(x = "Diameter (cm)", y = "Height (m)", title = species) +
       geom_line(aes(y = predicted_height), size = 1, color = 'red') +
       geom_line(data = ausplots_subset, aes(x = Diameter, y = predicted_height), size = 1, color = 'red', linetype = 2) +
       theme_bw() +
       geom_text(x = 25, y = 80, aes(label = format(rsquared, digits = 2), family = 'mono')) +
       coord_cartesian(xlim=c(min(ausplots_subset$Diameter), 1.02*max(ausplots_subset$Diameter)),
                       ylim=c(min(ausplots_subset$Height), 1.02*max(ausplots_subset$Height))))


species <- "Eucalyptus fastigata"
euc_subset <- subset(ausplots_subset, Genus_Species == species)
model <- lm(Height ~ Diameter, data = euc_subset)
rsquared <- summary(model)$r.squared
euc_subset$predicted_height <- predict(model)
print(ggplot(euc_subset, aes(x = Diameter, y = Height)) +
       geom_point() +
       labs(x = "Diameter (cm)", y = "Height (m)", title = species) +
       geom_line(aes(y = predicted_height), size = 1, color = 'red') +
       geom_line(data = ausplots_subset, aes(x = Diameter, y = predicted_height), size = 1, color = 'red', linetype = 2) +
       theme_bw() +
       geom_text(x = 25, y = 80, aes(label = format(rsquared, digits = 2), family = 'mono')) +
       coord_cartesian(xlim=c(min(ausplots_subset$Diameter), 1.02*max(ausplots_subset$Diameter)),
                       ylim=c(min(ausplots_subset$Height), 1.02*max(ausplots_subset$Height))))