In [None]:
args <- read.csv("ARGs_stats.csv")
head(args)

In [None]:
library(stringr)

### Extract the ARG gene

In [None]:
# Take only the ARG gene from main dataframe
colgene <- seq(1, 96, by =3)
args_gene <- args[, colgene]

# Change column names replacing "-GENE"
gene_colnames <- colnames(args_gene)
gene_colnames2 <- str_replace(gene_colnames, "_GENE", "")

# Replace column names
colnames(args_gene) <- gene_colnames2
head(args_gene)

### Extract plasmid genes

In [None]:
# Take only the pGENE from main dataframe
colpgene <- seq(2, 96, by =3)
args_pgene <- args[, colpgene]


# Change column names replacing "-pGENE"
pgene_colnames <- colnames(args_pgene)
pgene_colnames2 <- str_replace(pgene_colnames, "_pGENE", "")


# Replace column names
colnames(args_pgene) <- pgene_colnames2
head(args_pgene)

### Extract vGENE

In [None]:
# Take only the vGENE from main dataframe
colvgene <- seq(3, 96, by =3)
args_vgene <- args[, colvgene]

# Change column names replacing "-vGENE"
vgene_colnames <- colnames(args_vgene)
vgene_colnames2 <- str_replace(vgene_colnames, "_vGENE", "")

# Replace column names
colnames(args_vgene) <- vgene_colnames2
head(args_vgene)


## Count the gene number

In [None]:
library(dplyr)
library(tidyr)

In [None]:
# Count the gene number of ARG genes
gene <- args_gene %>%
   pivot_longer(everything(), values_drop_na = TRUE, values_to = 'Gene') %>%
   count(name, Gene) %>% 
   pivot_wider(names_from = name, values_from = n, values_fill = 0)
head(gene)

In [None]:
# Count the gene number of pGENE
p_gene <- args_pgene %>%
   pivot_longer(everything(), values_drop_na = TRUE, values_to = 'Gene') %>%
   count(name, Gene) %>% 
   pivot_wider(names_from = name, values_from = n, values_fill = 0)
head(p_gene)

In [None]:
# Count the gene number of vGENE
v_gene <- args_vgene %>%
   pivot_longer(everything(), values_drop_na = TRUE, values_to = 'Gene') %>%
   count(name, Gene) %>% 
   pivot_wider(names_from = name, values_from = n, values_fill = 0)

head(v_gene)

## Create heatmap

In [None]:
options(repr.plot.width=7, repr.plot.height=6, repr.plot.res = 600)

In [None]:
# For ARG gene

# convert the count tibble as dataframe
gene2 <- as.data.frame(gene)

# Remove the blank cell count
gene3 <- gene2[-1,]

# Add first column as rownames
gene4 <- data.frame(gene3[,-1], row.names = gene3[,1])

# Convert the dataframe as matrix
gene5 <- as.matrix(gene4)


# Set the color
library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(8, "YlOrBr"))(25)

# Create the heatmap and save as png image
png(file="images/gene.png",
       width = 4000,
       height = 3250,
       res = 600)
#plot
heatmap(gene5, col = coul)

dev.off()

In [None]:
# For p_GENE

# convert the count tibble as dataframe
gene2 <- as.data.frame(p_gene)

# Remove the blank cell count
gene3 <- gene2[-1,]

# Add first column as rownames
gene4 <- data.frame(gene3[,-1], row.names = gene3[,1])

# Convert the dataframe as matrix
gene5 <- as.matrix(gene4)


# Set the color
library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(8, "Purples"))(25)


# Create the heatmap and save as png image
png(file="images/p_gene.png",
       width = 4000,
       height = 3250,
       res = 600)
#plot
heatmap(gene5, col = coul)

dev.off()

In [None]:
# For v_GENE

# convert the count tibble as dataframe
gene2 <- as.data.frame(v_gene)

# Remove the blank cell count
gene3 <- gene2[-1,]

# Add first column as rownames
gene4 <- data.frame(gene3[,-1], row.names = gene3[,1])

# Convert the dataframe as matrix
gene5 <- as.matrix(gene4)


# Set the color
library(RColorBrewer)
coul <- colorRampPalette(brewer.pal(8, "Blues"))(25)


# Create the heatmap and save as png image
png(file="images/v_gene.png",
       width = 4000,
       height = 3250,
       res = 600)
#plot
heatmap(gene5, col = coul)

dev.off()

## Abundance of Pathway

In [None]:
# load the data
pathways <- read.csv("pathway_for_R_ARG_project.csv")
head(pathways)

In [None]:
pathways_t <- as.data.frame(t(pathways))
colnames(pathways_t) <- pathways_t[1,]
pathways_t <- pathways_t[-1,]


# Transform the abundance into numeric
i <- c(seq(2,22))
pathways_t[ , i] <- apply(pathways_t[ , i], 2,            # Specify own function within apply
                    function(x) as.numeric(as.character(x)))

# Add 1 to every value
pathways_t[ , i] <- apply(pathways_t[ , i], 2,            
                          function(x) x+1)
                           
# transform to log
pathways_t[ , i] <- apply(pathways_t[ , i], 2,            # Specify own function within apply
                     function(x) log(x))
                       
head(pathways_t)

In [None]:
library(tidyr)

In [None]:
# Transform into long format
long_pathways <- gather(pathways_t, pathways, coverage, "Methicillin_resistance":"Carbapenem_resistance", factor_key = TRUE)
head(long_pathways)

In [None]:
library(ggplot2)

In [None]:
options(repr.plot.width = 8,
        repr.plot.height = 3,
        repr.plot.res = 600)

# grouped boxplot
pathways_plot <- ggplot(long_pathways, aes(x=pathways, y=coverage, fill=Group)) + 
    geom_boxplot(lwd=.3) +
    theme_bw() +
    ylab("Abundance in log scale") +
    theme(axis.title.x = element_blank(),
          axis.text.x = element_text(angle = 50, vjust = 1, hjust = 1, size = 6, face = "bold"),
          legend.key.size = unit(.5, 'cm'),
          legend.key.height = unit(.5, 'cm'), 
          legend.key.width = unit(.5, 'cm'), 
          legend.title = element_text(size=6), 
          legend.text = element_text(size=5))

pathways_plot