# Distributions

In [None]:
# Inside the folders (03-Blast & 04-FilterBlast) we run:
# ls | grep -v ".tsv" | xargs -I {} sh -c "cat {}/* | cut -f 4 > lens.{}.tsv" & # Alig. Length
# ls | grep -v ".tsv" | xargs -I {} sh -c "cat {}/* | cut -f 3 > pident.{}.tsv" & # Perc. Identity
# ls | grep -v ".tsv" | xargs -I {} sh -c "cat {}/* | cut -f 17 > qcovs.{}.tsv" & # Query Coverage

In [2]:
# Library
library(tidyverse)

In [4]:
# Define samples and colors
s <- c('GP_1','GP_2','GP_3','A0_2','B0_2','C0_2','A1_2','B1_2','C1_2','A5_0','B5_0','C5_0','A8_0','B8_0','C8_0')
c <- c(rep("#F8766D",3), rep("#B79F00",3), rep("#00BF7D",3), rep("#00B0F6",3), rep("#E76BF3",3))
names(c) <- s

## Length: Plot lengths pre and after filtering

In [4]:
histo_lens <- function(sample, color) {
    # Rename sample
    oldnames <- c('GP_1','GP_2','GP_3','A0_2','B0_2','C0_2','A1_2','B1_2','C1_2','A5_0','B5_0','C5_0','A8_0','B8_0','C8_0')
    newnames <- c('EN0.2A','EN0.2B','EN0.2C','OP0.2A','OP0.2B','OP0.2C','OP1.2A','OP1.2B','OP1.2C','OP5.0A','OP5.0B','OP5.0C','OP8.0A','OP8.0B','OP8.0C')
    names(newnames) <- oldnames
    name <- newnames[sample]
    # Read files
    pre = read.table(paste("03-Blast/lens.", sample, ".tsv", sep=""))
    pst = read.table(paste("04-FilterBlast/lens.post_filter.", sample, ".tsv", sep=""))
    # Histogram
    p <- ggplot() + 
        geom_histogram(data=pre, aes(x=V1), bins=100, fill=color, color="lightgray") +
        geom_histogram(data=pst, aes(x=V1), bins=100, fill=color, color = "black") +
        labs(x="Alignment length", y="Count", title=paste("Length: ", name, sep="")) +
        theme_classic() %+replace% theme(axis.text = element_text(size=16),
                                 axis.title = element_text(size=18),
                                 title = element_text(size=22, face="bold"))
    # Return
    return(p)
}

In [5]:
p <- histo_lens("GP_1", "#F8766D")
# Save figure
png(file=paste("07-Plots/lens.GP_1.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [6]:
p <- histo_lens("GP_2", "#F8766D")
# Save figure
png(file=paste("07-Plots/lens.GP_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [7]:
p <- histo_lens("GP_3", "#F8766D")
# Save figure
png(file=paste("07-Plots/lens.GP_3.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [8]:
p <- histo_lens("A0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/lens.A0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [9]:
p <- histo_lens("B0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/lens.B0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [10]:
p <- histo_lens("C0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/lens.C0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [11]:
p <- histo_lens("A1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/lens.A1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [12]:
p <- histo_lens("B1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/lens.B1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [13]:
p <- histo_lens("C1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/lens.C1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [14]:
p <- histo_lens("A5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/lens.A5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [15]:
p <- histo_lens("B5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/lens.B5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [16]:
p <- histo_lens("C5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/lens.C5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [17]:
p <- histo_lens("A8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/lens.A8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [18]:
p <- histo_lens("B8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/lens.B8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [19]:
p <- histo_lens("C8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/lens.C8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

## Pident: Plot lengths pre and after filtering

In [20]:
histo_pident <- function(sample, color) {
    # Rename sample
    oldnames <- c('GP_1','GP_2','GP_3','A0_2','B0_2','C0_2','A1_2','B1_2','C1_2','A5_0','B5_0','C5_0','A8_0','B8_0','C8_0')
    newnames <- c('EN0.2A','EN0.2B','EN0.2C','OP0.2A','OP0.2B','OP0.2C','OP1.2A','OP1.2B','OP1.2C','OP5.0A','OP5.0B','OP5.0C','OP8.0A','OP8.0B','OP8.0C')
    names(newnames) <- oldnames
    name <- newnames[sample]
    # Read files
    pre = read.table(paste("03-Blast/pident.", sample, ".tsv", sep=""))
    pst = read.table(paste("04-FilterBlast/pident.post_filter.", sample, ".tsv", sep=""))
    # Histogram
    p <- ggplot() + 
        geom_histogram(data=pre, aes(x=V1), bins=100, fill=color, color="lightgray") +
        geom_histogram(data=pst, aes(x=V1), bins=100, fill=color, color = "black") +
        labs(x="Perc. Identity", y="Count", title=paste("Identity: ", name, sep="")) +
        theme_classic() %+replace% theme(axis.text = element_text(size=16),
                                 axis.title = element_text(size=18),
                                 title = element_text(size=22, face="bold"))
    # Return
    return(p)
}

In [21]:
p <- histo_pident("GP_1", "#F8766D")
# Save figure
png(file=paste("07-Plots/pident.GP_1.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [22]:
p <- histo_pident("GP_2", "#F8766D")
# Save figure
png(file=paste("07-Plots/pident.GP_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [23]:
p <- histo_pident("GP_3", "#F8766D")
# Save figure
png(file=paste("07-Plots/pident.GP_3.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [24]:
p <- histo_pident("A0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/pident.A0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [25]:
p <- histo_pident("B0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/pident.B0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [26]:
p <- histo_pident("C0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/pident.C0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [27]:
p <- histo_pident("A1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/pident.A1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [28]:
p <- histo_pident("B1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/pident.B1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [29]:
p <- histo_pident("C1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/pident.C1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [30]:
p <- histo_pident("A5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/pident.A5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [32]:
p <- histo_pident("B5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/pident.B5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [31]:
p <- histo_pident("C5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/pident.C5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [33]:
p <- histo_pident("A8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/pident.A8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [34]:
p <- histo_pident("B8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/pident.B8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

In [35]:
p <- histo_pident("C8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/pident.C8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

## Qcovs: Plot lengths pre and after filtering

In [3]:
histo_qcovs <- function(sample, color) {
    # Rename sample
    oldnames <- c('GP_1','GP_2','GP_3','A0_2','B0_2','C0_2','A1_2','B1_2','C1_2','A5_0','B5_0','C5_0','A8_0','B8_0','C8_0')
    newnames <- c('EN0.2A','EN0.2B','EN0.2C','OP0.2A','OP0.2B','OP0.2C','OP1.2A','OP1.2B','OP1.2C','OP5.0A','OP5.0B','OP5.0C','OP8.0A','OP8.0B','OP8.0C')
    names(newnames) <- oldnames
    name <- newnames[sample]
    # Read files
    pre = read.table(paste("03-Blast/qcovs.", sample, ".tsv", sep=""))
    pst = read.table(paste("04-FilterBlast/qcovs.post_filter.", sample, ".tsv", sep=""))
    # Histogram
    p <- ggplot() + 
        geom_histogram(data=pre, aes(x=V1), bins=100, fill=color, color="lightgray") +
        geom_histogram(data=pst, aes(x=V1), bins=100, fill=color, color = "black") +
        labs(x="Query coverage", y="Count", title=paste("Coverage: ", name, sep="")) +
        xlim(0, 100) +
        theme_classic() %+replace% theme(axis.text = element_text(size=16),
                                 axis.title = element_text(size=18),
                                 title = element_text(size=22, face="bold"))
    # Return
    return(p)
}

In [6]:
p <- histo_qcovs("GP_1", "#F8766D")
# Save figure
png(file=paste("07-Plots/qcovs.GP_1.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 81 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [7]:
p <- histo_qcovs("GP_2", "#F8766D")
# Save figure
png(file=paste("07-Plots/qcovs.GP_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 193 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [8]:
p <- histo_qcovs("GP_3", "#F8766D")
# Save figure
png(file=paste("07-Plots/qcovs.GP_3.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 136 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [9]:
p <- histo_qcovs("A0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/qcovs.A0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 368 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [10]:
p <- histo_qcovs("B0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/qcovs.B0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 8 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [11]:
p <- histo_qcovs("C0_2", "#B79F00")
# Save figure
png(file=paste("07-Plots/qcovs.C0_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 702 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [12]:
p <- histo_qcovs("A1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/qcovs.A1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 2074 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [13]:
p <- histo_qcovs("B1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/qcovs.B1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 2337 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [4]:
p <- histo_qcovs("C1_2","#00BF7D")
# Save figure
png(file=paste("07-Plots/qcovs.C1_2.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 142 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [5]:
p <- histo_qcovs("A5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/qcovs.A5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 293 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [6]:
p <- histo_qcovs("B5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/qcovs.B5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 11698 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [7]:
p <- histo_qcovs("C5_0","#00B0F6")
# Save figure
png(file=paste("07-Plots/qcovs.C5_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 1437 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [8]:
p <- histo_qcovs("A8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/qcovs.A8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 13818 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [9]:
p <- histo_qcovs("B8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/qcovs.B8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 4099 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”


In [10]:
p <- histo_qcovs("C8_0","#E76BF3")
# Save figure
png(file=paste("07-Plots/qcovs.C8_0.png", sep=""), width=2300, height=2000, res=300)
p
dev.off()

“[1m[22mRemoved 250 rows containing non-finite values (`stat_bin()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
“[1m[22mRemoved 2 rows containing missing values (`geom_bar()`).”
