Skip to content

jinghuazhao/Omics-analysis

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Omics analysis of complex traits

Seeding comprehensive analysis in their named directories (e.g., BMI), the repository links to technical issues documented in physalia, Mixed-Models, software-notes and other sister repositories: SUMSTATS, FM-pipeline, PW-pipeline, hess-pipeline, TWAS-pipeline, EWAS-fusion. for fine-mapping, pathway analysis, TWAS, Mendelian randomisation, predictive analytics and other topics as highlighted in the wiki page.

Earlier or broader aspects have been reflected in the following repositories: Haplotype-Analysis, misc, R.

The figure below was generated with eQTL.R.

Resources

--- Glossary of Genetics ---

NHGRI Genetics glossary

--- Annotation ---

The Ensembl public MySQL Servers

The following script gives information on genes from ENSEMBL as well as attributes (columns) that contains gene.

library(biomaRt)
listMarts()
mart <- useMart("ENSEMBL_MART_FUNCGEN")
listDatasets(mart)
mart <- useMart("ensembl")
listDatasets(mart)
ensembl <- useMart("ensembl", dataset="hsapiens_gene_ensembl", host="grch37.ensembl.org", path="/biomart/martservice")
attr <- listAttributes(ensembl)
attr_select <- c('ensembl_gene_id', 'chromosome_name', 'start_position', 'end_position', 'description', 'hgnc_symbol', 'transcription_start_site')
gene <- getBM(attributes = attr_select, mart = ensembl)
filter <- listFilters(ensembl)
searchFilters(mart = ensembl, pattern = "gene")

See also https://sites.google.com/site/jpopgen/wgsa for precompiled annotation. Alternatively,

# GENCODE v19
url <- "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_human/release_19/gencode.v19.chr_patch_hapl_scaff.annotation.gtf.gz"
gtf <- rtracklayer::import(url)
gencode <- as.data.frame(gtf)

--- Linkage disequilibrium ---

LDlink: https://ldlink.nci.nih.gov/?tab=home.

NyuWa Chinese Population Variant Database (NCVD): http://bigdata.ibp.ac.cn/NyuWa_variants/

--- EFO ---

https://www.ebi.ac.uk/efo/

Example code,

library(ontologyIndex)

id <- function(ontology)
{
  inflammatory <- grep(ontology$name,pattern="inflammatory")
  immune <- grep(ontology$name,pattern="immune")
  inf <- union(inflammatory,immune)
  list(id=ontology$id[inf],name=ontology$name[inf])
}
# GO
data(go)
goidname <- id(go)
# EFO
file <- "efo.obo"
get_relation_names(file)
efo <- get_ontology(file, extract_tags="everything")
length(efo) # 89
length(efo$id) # 27962
efoidname <- id(efo)
diseases <- get_descendants(efo,"EFO:0000408")
efo_0000540 <- get_descendants(efo,"EFO:0000540")
efo_0000540name <- efo$name[efo_0000540]
isd <- data.frame(efo_0000540,efo_0000540name)
save(efo,diseases,isd,efoidname,goidname, file="work/efo.rda")
write.table(isd,file="efo_0000540.csv",col.names=FALSE,row.names=FALSE,sep=",")
pdf("efo_0000540.pdf",height=15,width=15)
library(ontologyPlot)
onto_plot(efo,efo_0000540)
dev.off()

--- FUMA GWAS ---

https://fuma.ctglab.nl/ (https://github.com/Kyoko-wtnb/FUMA-webapp/)

--- GRCh38 reference genome ---

https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/GRCh38_reference_genome/

--- GTEx and eQTLGen ---

--- MetaMapLite ---

https://metamap.nlm.nih.gov/MetaMapLite.shtml

--- MR-Base/OpenGWAS ---

--- OmicsPred ---

https://www.omicspred.org/

--- PredictDB data repository ---

http://predictdb.org/

--- Proteomic researches ---

--- RegulomeDB ---

http://regulomedb.org/

--- Roadmap ---

http://www.roadmapepigenomics.org/

--- snakemake workflow catalogue ---

https://snakemake.github.io/snakemake-workflow-catalog/

--- TWAS ---

https://github.com/hakyimlab/MetaXcan

http://gusevlab.org/projects/fusion/

--- eQTL Catalog ---

https://www.ebi.ac.uk/eqtl/

--- GWAS Catalog ---

https://www.ebi.ac.uk/gwas/

--- PGS Catalog ---

https://www.pgscatalog.org/

--- PheWAS Catalog ---

https://phewascatalog.org/

--- rentrez ---

The relevant URLs are as follows,

with example code,

library(rentrez)
entrez_dbs()
entrez_db_links("pubmed")
pubmed_fields <- entrez_db_searchable("pubmed")
# set_entrez_key("")
Sys.getenv("ENTREZ_KEY")
term <- "pQTLs OR (protein AND quantitative AND trait AND loci) AND human [MH] AND (plasma OR Serum)"
r <- entrez_search(db="pubmed",term=term,use_history=TRUE)
class(r)
names(r)
with(r,web_history)
unlink(paste("pubmed",c("fetch","summary"),sep="."))
fields <- c("uid", "pubdate", "sortfirstauthor", "title", "source", "volume", "pages")
for(i in seq(1,with(r,count),50))
{
  cat(i+49, "records downloaded\r")
  f <- entrez_fetch(db="pubmed", web_history=with(r,web_history), rettype="text", retmax=50, retstart=i)
  write.table(f, col.names=FALSE, row.names=FALSE, file="pubmed.fetch", append=TRUE)
  s <- entrez_summary(db="pubmed", web_history=with(r,web_history), rettype="text", retmax=50, retstart=i)
  e <- extract_from_esummary(s, fields)
  write.table(t(e), col.names=FALSE, row.names=FALSE, file="pubmed.summary", append=TRUE, sep="\t")
}
id <- 600807
upload <- entrez_post(db="omim", id=id)
asthma_variants <- entrez_link(dbfrom="omim", db="clinvar", cmd="neighbor_history", web_history=upload)
asthma_variants
snp_links <- entrez_link(dbfrom="clinvar", db="snp", web_history=asthma_variants$web_histories$omim_clinvar, cmd="neighbor_history")
all_links <- entrez_link(dbfrom='pubmed', id=id, db='all')

--- Sequence Ontology ---

http://www.sequenceontology.org/

--- TWAS-hub ---

http://twas-hub.org/

--- Biobanks ---

--- Other links ---