.Rhistory

filter(p.adjust < 0.1) %>%
ggplot(., aes(x = reorder(Description, GeneRatio), y = GeneRatio)) +
geom_point(aes(size = Count, color = p.adjust)) +
theme_minimal() +
labs(x = "MAP Description", y = "Gene Ratio", title = "Nursery vs. CCC") +
coord_flip() +
scale_colour_gradient(low="blue", high="red") +
theme(plot.title = element_text(size=15),  # Increase plot title text size
axis.title = element_text(size=13),  # Increase axis titles text size
axis.text = element_text(size=12),
legend.text = element_text(size=12))
ggsave("nurseryvsCCC_KEGGenrichment_p0.1.pdf")
filtered_KEGG_enrich_TvI %>%
drop_na(Category) %>%
filter(p.adjust < 0.1) %>%
ggplot(., aes(x = reorder(Description, GeneRatio), y = GeneRatio)) +
geom_point(aes(size = Count, color = p.adjust)) +
theme_minimal() +
labs(x = "MAP Description", y = "Gene Ratio", title = "Nursery vs. CCC") +
coord_flip() +
scale_colour_gradient(low="blue", high="red") +
theme(plot.title = element_text(size=15),  # Increase plot title text size
axis.title = element_text(size=13),  # Increase axis titles text size
axis.text = element_text(size=12),
legend.text = element_text(size=12))
filtered_KEGG_enrich_TvI %>%
filter(p.adjust<0.1) #6 categories with different descriptions
# Define a color ramp from blue to red
blue_to_red <- colorRampPalette(c("blue", "red"))
# Generate n colors from this palette
n_colors <- blue_to_red(2)
filtered_KEGG_enrich_TvI
# Set pathway of interest from above
filtered_KEGG_enrich_TvI %>%
drop_na(Category) %>%
filter(p.adjust < 0.05)
map_id <- "04915" #Estrogen signaling pathway
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Estrogen signaling pathway",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
nurseryvsCCC_DGEs
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Estrogen signaling pathway",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
filtered_KEGG_enrich_TvI %>%
drop_na(Category) %>%
filter(p.adjust < 0.1)
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Melanogenesis",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
map_id <- "map04916"  #Melanogenesis
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Melanogenesis",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
map_id <- "04916"  #Melanogenesis
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Melanogenesis",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
map_id <- "04928"
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Parathyroid hormone synthesis, secretion and action",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
filtered_KEGG_enrich_TvI %>%
filter(p.adjust<0.2)
map_id <- "04657" #IL-17 signaling pathway (immune system)
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "IL-17 signaling pathway",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
map_id <- "04626" #Plant-pathogen interaction (environmental adaptation) map04626
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Plant-pathogen interaction",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
map_id <- "04668" #TNF signaling pathway (Environmental Information Processing; Signal transduction) map04668
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "TNF signaling pathway",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
map_id <- "04612" #Antigen processing and presentation (immune system) map04612
# run pathview on a specific path
pv_path <- pathview(
gene.data = nurseryvsCCC_KEGG_DGEs,
pathway.id = map_id,
species = "ko",
out.suffix = "Antigen processing and presentation",
limit = list(
gene = c(-1, 1), # upper and lower range of log2foldchange values
cpd = c(-4, 4)),
discrete = list(
gene = FALSE, # set this to true when working with discrete data
cpd = FALSE),
low = n_colors[1],
mid = n_colors[4],
high = n_colors[2],
kegg.native = T # T: outputs a traditional KEGG pathway map, F: outputs a different map style
)
source("gomwu.functions.R")
setwd("~/Library/CloudStorage/OneDrive-UniversityofMiami/GitHub/Ch4_AcerCCC/GO-MWU")
source("gomwu.functions.R")
goDatabase="go.obo" # download from http://www.geneontology.org/GO.downloads.ontology.shtml
goAnnotations="Acropora_iso2go.tab"
input="nursery_vs_CCC_fc.csv"
goDivision="MF" # either MF, or BP, or CC
gomwuStats(input, goDatabase, goAnnotations, goDivision,
perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
smallest=5,   # a GO category should contain at least this many genes to be considered
clusterCutHeight=0.25 # threshold for merging similar (gene-sharing) terms. See README for details.
#	Alternative="g" # by default the MWU test is two-tailed; specify "g" or "l" of you want to test for "greater" or "less" instead.
#Module=TRUE,Alternative="g" # un-remark this if you are analyzing a SIGNED WGCNA module (values: 0 for not in module genes,
#kME for in-module genes). In the call to gomwuPlot below, specify absValue=0.001 (count number of "good genes" that fall into the module)
#	Module=TRUE # un-remark this if you are analyzing an UNSIGNED WGCNA module
)
goDivision="CC" # either MF, or BP, or CC
gomwuStats(input, goDatabase, goAnnotations, goDivision,
perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
smallest=5,   # a GO category should contain at least this many genes to be considered
clusterCutHeight=0.25 # threshold for merging similar (gene-sharing) terms. See README for details.
#	Alternative="g" # by default the MWU test is two-tailed; specify "g" or "l" of you want to test for "greater" or "less" instead.
#Module=TRUE,Alternative="g" # un-remark this if you are analyzing a SIGNED WGCNA module (values: 0 for not in module genes,
#kME for in-module genes). In the call to gomwuPlot below, specify absValue=0.001 (count number of "good genes" that fall into the module)
#	Module=TRUE # un-remark this if you are analyzing an UNSIGNED WGCNA module
)
# Plotting results
results=gomwuPlot(input,goAnnotations,goDivision,
#absValue=-log(0.05,10),  # genes with the measure value exceeding this will be counted as "good genes".
#Specify -log(0.05,10) for log p-value (lpv) datasets, and 1 for fold change (fc) datasets.
#Specify absValue=0.001 if you are doing Fisher's exact test for standard GO enrichment or analyzing a WGCNA module
#(all non-zero genes = "good genes").
absValue=1,
#absValue=0.001,
# level1=1, # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
# level1=0.1,
level1=0.05,
# level2=0.1, # FDR cutoff to print in regular (not italic) font.
# level2=0.05,
level2=0.01,
# level3=0.05, # FDR cutoff to print in large bold font.
# level3=0.01,
level3=0.001,
txtsize=1.2,    # decrease to fit more on one page, or increase
#(after rescaling the plot so the tree fits the text) for better "word cloud" effect
treeHeight=0.5, # height of the hierarchical clustering tree
#	colors=c("dodgerblue2","firebrick1","skyblue2","lightcoral") # these are default colors, un-remar and change if needed
)
# Plotting results
results=gomwuPlot(input,goAnnotations,goDivision,
#absValue=-log(0.05,10),  # genes with the measure value exceeding this will be counted as "good genes".
#Specify -log(0.05,10) for log p-value (lpv) datasets, and 1 for fold change (fc) datasets.
#Specify absValue=0.001 if you are doing Fisher's exact test for standard GO enrichment or analyzing a WGCNA module
#(all non-zero genes = "good genes").
absValue=1,
#absValue=0.001,
# level1=1, # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
# level1=0.1,
level1=0.05,
# level2=0.1, # FDR cutoff to print in regular (not italic) font.
# level2=0.05,
level2=0.01,
# level3=0.05, # FDR cutoff to print in large bold font.
# level3=0.01,
level3=0.001,
txtsize=1.2,    # decrease to fit more on one page, or increase
#(after rescaling the plot so the tree fits the text) for better "word cloud" effect
treeHeight=0.5, # height of the hierarchical clustering tree
colors=c("dodgerblue2","firebrick1","skyblue2","lightcoral") # these are default colors, un-remar and change if needed
)
write.csv(results, file = "CC_fc_05.csv")
library(tidyverse)
MWU_result_MF=read.csv("MWU_MF_nursery_vs_CCC_fc.csv", sep = "")
MWU_result_MF
MWU_result_MF %>%
filter(p.adj < 0.05) %>%
summarise(count = n()) #117 terms = that matches above yay
goDivision="MF" # either MF, or BP, or CC
gomwuStats(input, goDatabase, goAnnotations, goDivision,
perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
smallest=5,   # a GO category should contain at least this many genes to be considered
clusterCutHeight=0.25 # threshold for merging similar (gene-sharing) terms. See README for details.
#	Alternative="g" # by default the MWU test is two-tailed; specify "g" or "l" of you want to test for "greater" or "less" instead.
#Module=TRUE,Alternative="g" # un-remark this if you are analyzing a SIGNED WGCNA module (values: 0 for not in module genes,
#kME for in-module genes). In the call to gomwuPlot below, specify absValue=0.001 (count number of "good genes" that fall into the module)
#	Module=TRUE # un-remark this if you are analyzing an UNSIGNED WGCNA module
)
# Plotting results
results=gomwuPlot(input,goAnnotations,goDivision,
#absValue=-log(0.05,10),  # genes with the measure value exceeding this will be counted as "good genes".
#Specify -log(0.05,10) for log p-value (lpv) datasets, and 1 for fold change (fc) datasets.
#Specify absValue=0.001 if you are doing Fisher's exact test for standard GO enrichment or analyzing a WGCNA module
#(all non-zero genes = "good genes").
absValue=1,
#absValue=0.001,
# level1=1, # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
# level1=0.1,
level1=0.05,
# level2=0.1, # FDR cutoff to print in regular (not italic) font.
# level2=0.05,
level2=0.01,
# level3=0.05, # FDR cutoff to print in large bold font.
# level3=0.01,
level3=0.001,
txtsize=1.2,    # decrease to fit more on one page, or increase
#(after rescaling the plot so the tree fits the text) for better "word cloud" effect
treeHeight=0.5, # height of the hierarchical clustering tree
colors=c("dodgerblue2","firebrick1","skyblue2","lightcoral") # these are default colors, un-remar and change if needed
)
write.csv(results, file = "MF_fc_05.csv")
# Plotting results
results=gomwuPlot(input,goAnnotations,goDivision,
#absValue=-log(0.05,10),  # genes with the measure value exceeding this will be counted as "good genes".
#Specify -log(0.05,10) for log p-value (lpv) datasets, and 1 for fold change (fc) datasets.
#Specify absValue=0.001 if you are doing Fisher's exact test for standard GO enrichment or analyzing a WGCNA module
#(all non-zero genes = "good genes").
absValue=1,
#absValue=0.001,
# level1=1, # FDR threshold for plotting. Specify level1=1 to plot all GO categories containing genes exceeding the absValue.
level1=0.1,
#level1=0.05,
# level2=0.1, # FDR cutoff to print in regular (not italic) font.
level2=0.05,
#level2=0.01,
# level3=0.05, # FDR cutoff to print in large bold font.
level3=0.01,
#level3=0.001,
txtsize=1.2,    # decrease to fit more on one page, or increase
#(after rescaling the plot so the tree fits the text) for better "word cloud" effect
treeHeight=0.5, # height of the hierarchical clustering tree
colors=c("dodgerblue2","firebrick1","skyblue2","lightcoral") # these are default colors, un-remar and change if needed
)
MWU_result_MF %>%
filter(p.adj < 0.1) %>%
summarise(count = n()) #2
goDivision="BP" # either MF, or BP, or CC
gomwuStats(input, goDatabase, goAnnotations, goDivision,
perlPath="perl", # replace with full path to perl executable if it is not in your system's PATH already
largest=0.1,  # a GO category will not be considered if it contains more than this fraction of the total number of genes
smallest=5,   # a GO category should contain at least this many genes to be considered
clusterCutHeight=0.25 # threshold for merging similar (gene-sharing) terms. See README for details.
#	Alternative="g" # by default the MWU test is two-tailed; specify "g" or "l" of you want to test for "greater" or "less" instead.
#Module=TRUE,Alternative="g" # un-remark this if you are analyzing a SIGNED WGCNA module (values: 0 for not in module genes,
#kME for in-module genes). In the call to gomwuPlot below, specify absValue=0.001 (count number of "good genes" that fall into the module)
#	Module=TRUE # un-remark this if you are analyzing an UNSIGNED WGCNA module
)
setwd("~/Library/CloudStorage/OneDrive-UniversityofMiami/GitHub/Ch4_AcerCCC")
library(tidyverse)
library(WGCNA)
library(flashClust)
library(ape)
options(stringsAsFactors=FALSE)
allowWGCNAThreads()
# importing data generated from DESeq2 script
lnames=load("RData_files/data4wgcna.RData")
datt=t(vsd.wg)
ncol(datt) #15526
nrow(datt) #44
head(design)
str(design)
colnames(vsd.wg)
all.equal(colnames(vsd.wg), rownames(design)) #FALSE
rownames(design) <- colnames(vsd.wg)
head(design)
all.equal(colnames(vsd.wg), rownames(design)) #TRUE
rownames(datt)
#change Location to be binary (0 = FALSE, 1 = TRUE)
CCC = as.numeric(design$Location=="CCC")
Nursery = as.numeric(design$Location == "nursery")
# coding genotype as binary (0 = FALSE, 1 = TRUE)
design$Genotype
Cheetos_B = as.numeric(design$Genotype == "Cheetos_B")
MiamiBeach_C = as.numeric(design$Genotype == "MiamiBeach_C")
SunnyIsles_E = as.numeric(design$Genotype == "SunnyIsles_E")
traits <- data.frame(cbind(CCC, Nursery, Cheetos_B, MiamiBeach_C,SunnyIsles_E))
rownames(traits) <- rownames(design)
traits
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "^[^_]*"))
traits %>%
rownames_to_column()
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "^_*[^_]")) %>%
separate(rowname, into = c("Species", "ID"), sep = "\\.") %>%
mutate(Species = "Acropora cervicornis") %>%
mutate(ID = as.double(ID)) -> traits
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "^_*[^_]"))
traits <- data.frame(cbind(CCC, Nursery, Cheetos_B, MiamiBeach_C,SunnyIsles_E))
rownames(traits) <- rownames(design)
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "^_*[^_]"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "[^_]"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "^_*[_]"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "^_*[_*]"))
traits %>%
rownames_to_column()
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "[^_]+_[^_]+_$"))
traits %>%
rownames_to_column()
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "[^_]+_[^_]+_$"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "[^_]+_[^_]"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "[^_]+_[^_*]"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "([^_]+_[^_]+)$"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "([^_]+_[^_]+_[^_]+)$"))
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "([^_]+_[^_]+_[^_]+)$")) %>%
separate(rowname, into = c("Location", "Genotype"), sep = "_", extra = "merge")
traits %>%
rownames_to_column() %>%
mutate(rowname = str_extract(rowname, "([^_]+_[^_]+_[^_]+)$")) %>%
separate(rowname, into = c("Location", "Genotype"), sep = "_", extra = "merge")  -> traits
AcDC_traits <- read_csv("AcDC_FullReportCards (1)/dataExport.csv")
AcDC_traits
AcDC_traits %>%
rename(Genotype=genotype)
AcDC_traits %>%
rename(Genotype=genotype) %>%
mutate(Genotype = case_when(Genotype == "Cheetos-B" ~ "Cheetos_B"))
AcDC_traits %>%
rename(Genotype=genotype)
View(AcDC_traits)
AcDC_traits %>%
rename(Genotype=genotype) %>%
mutate(Genotype = case_when(Genotype == "Cheetos-B" ~ "Cheetos_B",
Genotype == "MB-C" ~ "MiamiBeach_C",
Genotype == "Sunny Isles-E" ~ "SunnyIsles_E"))
AcDC_traits %>%
rename(Genotype=genotype) %>%
mutate(Genotype = case_when(Genotype == "Cheetos-B" ~ "Cheetos_B",
Genotype == "MB-C" ~ "MiamiBeach_C",
Genotype == "Sunny Isles-E" ~ "SunnyIsles_E")) -> AcDC_traits
AcDC_traits
AcDC_traits %>%
select(c("Genotype", "trait", "mean"))
AcDC_traits %>%
select(c("Genotype", "trait", "mean")) %>%
pivot_wider(names_from="trait", values_from="mean")
AcDC_traits %>%
select(c("Genotype", "trait", "mean")) %>%
pivot_wider(names_from="trait", values_from="mean") -> AcDC_traits
AcDC_traits
full_join(traits, AcDC_traits, by = c("Genotype"))
full_join(traits, AcDC_traits, by = c("Genotype")) -> traits_withphysio
View(traits_withphysio)
full_join(traits, AcDC_traits, by = c("Genotype")) %>%
drop_na() -> traits_withphysio
full_join(traits, AcDC_traits, by = c("Genotype"))  -> traits_withphysio
traits_withphysio <- full_join(traits, AcDC_traits, by = c("Genotype")) %>%
select(!c("6-month linear growth")) #remove columns where not all genotypes have info
AcDC_traits$
traits_withphysio <- full_join(traits, AcDC_traits, by = c("Genotype")) %>%
select(!c("6-month linear growth", "bulk density", "specific growth rate")) #remove columns where not all genotypes have info
full_join(traits, AcDC_traits, by = c("Genotype")) %>%
select(!c("6-month linear growth", "bulk density", "specific growth rate"))
#remove columns where not all genotypes have info
full_join(traits, AcDC_traits, by = c("Genotype")) %>%
select(!c("6-month linear growth", "bulk density", "specific growth rate")) -> traits_withphysio
assays(dds)[["cooks"]
assays(dds)[["cooks"]]
assays(dds)[["cooks"]]
dds$sizeFactor
estimateSizeFactors(data)
estimateSizeFactors(dds)
SF.data <- estimateSizeFactors(dds)
print(sizeFactors(SF.data))