In [1]:
# Load data from the CSV file
data <- read.csv("Assembly-10000.csv")

# Remove useless data
data <- data[, -1]

# View the first few rows of the data
head(data)

Unnamed: 0_level_0,integer,assemblyindex
Unnamed: 0_level_1,<int>,<int>
1,1,0
2,2,1
3,3,2
4,4,2
5,5,3
6,6,3


In [9]:
library(ggplot2)

# Compute additional variables for plotting
data <- data.frame(integer = data$integer, 
                   assemblyindex = data$assemblyindex)

data$log2_integer <- log2(data$integer)
data$upper_bound <- 1.45 * data$log2_integer
data$lower_bound <- data$log2_integer

# Mid-points for labels
mid_x_upper <- 50
mid_y_upper <- 10
angle_upper <- 40

mid_x_lower <- 120
mid_y_lower <- 5
angle_lower <- 25

# Create the ggplot
p = ggplot(data, aes(x = integer)) +
  geom_point(aes(y = assemblyindex), shape = 4, color = "black") +
  scale_x_log10(name = "Integer (Log Scale)") +
  ylab("Assembly Index") +
  geom_line(aes(y = upper_bound), color = "red", linewidth = 1) +
  geom_line(aes(y = lower_bound), color = "green", linewidth = 1) +
  annotate("text", x = mid_x_upper, y = mid_y_upper, 
           label = 'paste(A %~~% 1.45 %*% log[2](I))', parse=TRUE, 
           angle = angle_upper, size = 5, color = "black") +
  annotate("text", x = mid_x_lower, y = mid_y_lower, 
           label = 'paste(A %~~% log[2](I))', parse=TRUE, 
           angle = angle_lower, size = 5, color = "black") +
  theme_minimal()

# print(image)
ggsave(filename = "figs/12_assembly-of-integers.pdf", plot = p, width = 80, height = 70, units = "mm")