In [None]:
library('dplyr')
library('readr')
library('tidyr')

Get the Trinity gene to transcript map

In [None]:
taxon_1_g2t <- read.table('/nesi/nobackup/project-code/taxon/path/to/taxon_1_trinity/trinity_assembly/Trinity.fasta.gene_trans_map', sep='\t', header = FALSE)
head(taxon_1_g2t)
dim(taxon_1_g2t)

Read in the Salmon output for this assembly

In [None]:
taxon_1_qua <- read.table('/nesi/nobackup/project-code/taxon/path/to/taxon_1_trinity/salmon/taxon_1.out/quant.sf', sep='\t', header = TRUE)
head(taxon_1_qua)
dim(taxon_1_qua)

Leave only transcript ID and TPM

In [None]:
taxon_1_clean <- taxon_1_qua %>%
    select('Name', 'TPM')
head(taxon_1_clean)
dim(taxon_1_clean)

Left join the cleaned expression matrix and g2t map

In [None]:
taxon_1_mer <- left_join(taxon_1_clean, taxon_1_g2t, by = c("Name" = "V2"))
head(taxon_1_mer)
dim(taxon_1_mer)

Keep only the isoforms with the highest TPM values

In [None]:
taxon_1_sorted <- taxon_1_mer %>%
    group_by(V1) %>%
    slice_max(TPM, with_ties = FALSE)
head(taxon_1_sorted)
dim(taxon_1_sorted)

Leave only transcript and gene IDs

In [None]:
taxon_1_map <- taxon_1_sorted %>%
    ungroup() %>%
    select(Name, V1)
head(taxon_1_map)
dim(taxon_1_map)

Output the g2t map for best supported contigs

In [None]:
write.table(taxon_1_map, '/nesi/nobackup/project-code/taxon/path/to/taxon_1_trinity/trinity_assembly/taxon_1_bestsupported_contigs_map.txt', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')

Leave only the best supported contigs

In [None]:
taxon_1_con_list <- taxon_1_map %>%
  select(Name)
head(taxon_1_con_list)
dim(taxon_1_con_list)

Output a list of the best supported contigs

In [None]:
write.table(taxon_1_con_list, '/nesi/nobackup/project-code/taxon/path/to/taxon_1_trinity/trinity_assembly/taxon_1_bestsupported_contigs_list.txt', row.names=FALSE, col.names=FALSE, quote=FALSE)

Repeat for the nest two samples for this taxon

In [None]:
taxon_2_g2t <- read.table('/nesi/nobackup/project-code/taxon/path/to/taxon_2_trinity/trinity_assembly/Trinity.fasta.gene_trans_map', sep='\t', header = FALSE)
head(taxon_2_g2t)
dim(taxon_2_g2t)

In [None]:
taxon_2_qua <- read.table('/nesi/nobackup/project-code/taxon/path/to/taxon_2_trinity/salmon/taxon_2.out/quant.sf', sep='\t', header = TRUE)
head(taxon_2_qua)
dim(taxon_2_qua)

In [None]:
taxon_2_clean <- taxon_2_qua %>%
    select('Name', 'TPM')
head(taxon_2_clean)
dim(taxon_2_clean)

In [None]:
taxon_2_mer <- left_join(taxon_2_clean, taxon_2_g2t, by = c("Name" = "V2"))
head(taxon_2_mer)
dim(taxon_2_mer)

In [None]:
taxon_2_sorted <- taxon_2_mer %>%
    group_by(V1) %>%
    slice_max(TPM, with_ties = FALSE)
head(taxon_2_sorted)
dim(taxon_2_sorted)

In [None]:
taxon_2_map <- taxon_2_sorted %>%
    ungroup() %>%
    select(Name, V1)
head(taxon_2_map)
dim(taxon_2_map)

In [None]:
write.table(taxon_2_map, '/nesi/nobackup/project-code/taxon/path/to/taxon_2_trinity/trinity_assembly/taxon_2_bestsupported_contigs_map.txt', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')

In [None]:
taxon_2_con_list <- taxon_2_map %>%
  select(Name)
head(taxon_2_con_list)
dim(taxon_2_con_list)

In [None]:
write.table(taxon_2_con_list, '/nesi/nobackup/project-code/taxon/path/to/taxon_2_trinity/trinity_assembly/taxon_2_bestsupported_contigs_list.txt', row.names=FALSE, col.names=FALSE, quote=FALSE)

In [None]:
taxon_3_g2t <- read.table('/nesi/nobackup/project-code/taxon/path/to/taxon_3_trinity/trinity_assembly/Trinity.fasta.gene_trans_map', sep='\t', header = FALSE)
head(taxon_3_g2t)
dim(taxon_3_g2t)

In [None]:
taxon_3_qua <- read.table('/nesi/nobackup/project-code/taxon/path/to/taxon_3_trinity/salmon/taxon_3.out/quant.sf', sep='\t', header = TRUE)
head(taxon_3_qua)
dim(taxon_3_qua)

In [None]:
taxon_3_clean <- taxon_3_qua %>%
    select('Name', 'TPM')
head(taxon_3_clean)
dim(taxon_3_clean)

In [None]:
taxon_3_mer <- left_join(taxon_3_clean, taxon_3_g2t, by = c("Name" = "V2"))
head(taxon_3_mer)
dim(taxon_3_mer)

In [None]:
taxon_3_sorted <- taxon_3_mer %>%
    group_by(V1) %>%
    slice_max(TPM, with_ties = FALSE)
head(taxon_3_sorted)
dim(taxon_3_sorted)

In [None]:
taxon_3_map <- taxon_3_sorted %>%
    ungroup() %>%
    select(Name, V1)
head(taxon_3_map)
dim(taxon_3_map)

In [None]:
write.table(taxon_3_map, '/nesi/nobackup/project-code/taxon/path/to/taxon_3_trinity/trinity_assembly/taxon_3_bestsupported_contigs_map.txt', row.names=FALSE, col.names=FALSE, quote=FALSE, sep='\t')

In [None]:
taxon_3_con_list <- taxon_3_map %>%
  select(Name)
head(taxon_3_con_list)
dim(taxon_3_con_list)

In [None]:
write.table(taxon_3_con_list, '/nesi/nobackup/project-code/taxon/path/to/taxon_3_trinity/trinity_assembly/taxon_3_bestsupported_contigs_list.txt', row.names=FALSE, col.names=FALSE, quote=FALSE)