In [4]:
library(DESeq2)
library(tidyverse)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.4     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.2     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 1.4.0     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mcollapse()[39m   masks [34mIRanges[39m::collapse()
[31m✖[39m [34mdplyr[39m::[32mcombine()[39m    masks [34mBiobase[39m::combine(), [34mBiocGenerics[39m::combine()
[31m✖[39m [34mdplyr[39m::[32mcount()[39m      masks [34mmatrixStats[39m::count()
[31m✖[39m [34mdplyr[39m::[32mdesc()[39m       masks [34mIRanges[39m::desc()
[31m✖[39m [34mtidyr[39m::[32mexpand()[39m     masks [34mS4Vectors[39m::expand()
[31m✖[39m [34mdplyr[39m::[32mfilter()

In [5]:
#load count output tables from featurecounts after STAR 

#mouseexp4 counts 
Ca_counts <- read.table("/data/haley/RNAseq/mouseexp4/featurecounts/C_alb_mRNA_STAR_counts.txt", header= TRUE)
#delete extraneous rows 
Ca_counts <- select(Ca_counts, -c(2:6))

#mouseexp3/2 and invitro counts 
Cacounts2 <- read.table("/data/haley/RNAseq/mouseexp3/featurecounts2/C_alb_mRNA_STAR_counts.txt", header= TRUE)
Cacounts2 <- select(Cacounts2, -c(2:6))

#combine all counts into one table 
Cacounts3 <- merge(Ca_counts, Cacounts2,  by='Geneid')

#copy colnames from output and paste into correct space in the metadata file used below 
colnames(Cacounts3)

In [6]:
#load sample metadata table with experimental information. Edit table as necessary in excel (save as .txt though) 

#load table for mouse samples
Ca_samples <- read.table("mouseexp4_Ca-metadata.txt", header= TRUE) 
#load table for invitro samples 
Ca_samples2 <- read.table("/data/haley/RNAseq/mouseexp3/DESeq_AfterSTAR/mouseexp3_Ca-metadata.txt", header= TRUE)
Ca_samples2 <- Ca_samples2[22:27,] #get rid of extraneous sample rows 

#combine rows of in vitro and mouse samples
Ca_samples3 <- rbind(Ca_samples,Ca_samples2)
Ca_samples4 <- Ca_samples3[-c(30:33),] # get rid of PolyA samples
rownames(Ca_samples4)<-NULL
Ca_samples4

#Grab rows (and order) of sample metadata
col.order_Ca <- c("Geneid", Ca_samples4$filename)
Ca_counts4 <- Cacounts3[,col.order_Ca] #apply order to columns of counts table, also gets rid of the polyA samples as it is both a selection and orders the columns according to that of the sample table (doesn't have polyA) 

#superimpose shortened sample name over filepath name in counts table (will become the name of sample in later analysis) 
all(colnames(select(Ca_counts4, -c(1))) == Ca_samples4$filename, na.rm=TRUE)
colnames(Ca_counts4) <- c('Geneid',paste0(Ca_samples4$samplename))
rownames(Ca_counts4) <- Ca_counts4[,1]
Ca_counts4[,1] <- NULL


# write.csv(Ca_counts4, "Cacounts4.csv")
# write.csv(Ca_samples3, "Ca_samples3.csv")

samplename,sample,condition,species1,species2,environment,filename
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
mCa_13AM1,mCa_13AM1,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample01_13AM1_v2_S17_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_13AM2,mCa_13AM2,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample02_13AM2_v2_S18_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_13BM3,mCa_13BM3,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample03_13BM3_v2_S19_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_13CF2,mCa_13CF2,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample04_13CF2_v2_S20_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_13CF3,mCa_13CF3,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample05_13CF3_v2_S21_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_21AM1,mCa_21AM1,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample01_21AM1_S1_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_21AM2,mCa_21AM2,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample02_21AM2_S2_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_21AM3,mCa_21AM3,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample03_21AM3_S3_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_21BM4,mCa_21BM4,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample04_21BM4_S4_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam
mCa_21CF1,mCa_21CF1,mouse_Ca,Candida_albicans,,mouse,...mouseexp2.Ca_starout_Aallele.Sample05_21CF1_S5_R1_001_fastp_output.fastq.gz_Aligned.sortedByCoord.out.bam


In [8]:
#save generate counts and sample table as R data table to be loaded into other notebooks for later use. 
#counts and samples tables can be used directly in further analysis without saving them 

Ca_counts <- Ca_counts4
Ca_samples <- Ca_samples4
save(Ca_counts, Ca_samples,  file = "2022_03-31_RNAseqcountsandsamples.RData")