Read in required libraries and the grass data, and check the column names and number of rows

In [None]:
library(ggplot2)

grass_data <- read.csv('grass_height_biomass.csv')
colnames(grass_data)
nrow(grass_data)

Create a linear regression model to predict biomass from height

In [None]:
biomass_model <- lm(data = grass_data, dry_biomass_g ~ drone_mean_height_cm)
summary(biomass_model)

Create a scatter plot of the data including he linear regression model

In [None]:
grass_data$predicted_biomass <- predict(biomass_model)

options(repr.plot.width=6, repr.plot.height=6)

ggplot(grass_data, aes(x = drone_mean_height_cm , y = dry_biomass_g)) +
       geom_point(size = 3) +
       labs(x = "Drone mean height (cm)", y = "Dry biomass (g)") +
       geom_line(aes(y = predicted_biomass), size = 1, color = 'red', linetype = "dashed") +
       theme_bw(base_size = 18)

Include the global biomass model on the scatter plot to compare slopes

In [None]:
grass_data$global_biomass <- 28.98 * grass_data$drone_mean_height_cm

ggplot(grass_data, aes(x = drone_mean_height_cm , y = dry_biomass_g)) +
       geom_point(size = 3) +
       labs(x = "Drone mean height (cm)", y = "Dry biomass (g)") +
       geom_line(aes(y = predicted_biomass), size = 1, color = 'red', linetype = "dashed") +
       geom_line(aes(y = global_biomass), size = 1, color = 'blue', linetype = "dashed") +
       theme_bw(base_size = 18)

Look for patterns in the residual plot

In [None]:
options(repr.plot.width=10, repr.plot.height=6)

plot(biomass_model, which = 1)

Transform the predictor variable with the logarithm function and create a new linear regression model 

In [None]:
log_model <- lm(data = grass_data, dry_biomass_g ~ log(drone_mean_height_cm))
summary(log_model)
options(repr.plot.width=10, repr.plot.height=6)
plot(log_model, which = 1)

Create a scatter plot of the log tranformed data and model

In [None]:
grass_data$predicted_biomass_log <- predict(log_model)

options(repr.plot.width=6, repr.plot.height=6)

ggplot(grass_data, aes(x = log(drone_mean_height_cm), y = dry_biomass_g)) +
       geom_point(size = 3) +
       labs(x = "Log of drone mean height (cm)", y = "Dry biomass (g)") +
       geom_line(aes(y = predicted_biomass_log), size = 1, color = 'red', linetype = "dashed") +
       theme_bw(base_size = 18)

To force the linear model through zero, you specify a zero intercept in the lm() function

In [None]:
biomass_model <- lm(data = grass_data, dry_biomass_g ~ 0 + drone_mean_height_cm)
summary(biomass_model)