Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit a2e337c
Showing
12 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
Package: VDJgermlines | ||
Type: Package | ||
Title: Variable, Diversity and Joining Sequences from Various Species | ||
Version: 0.1 | ||
Date: 2018-12-10 | ||
Author: Alexander Yermanos | ||
Maintainer: Alexander Yermanos <ayermanos@gmail.com> | ||
Depends: R(>= 3.1.0), ape, stringdist | ||
Description: Contains variable, diversity, and joining sequences and accompanying functions that enable both the extraction of and comparison between immune V-D-J genomic segments from a variety of species. Sources include IMGT from MP Lefranc (2009) <doi:10.1093/nar/gkn838> and Vgenerepertoire from publication DN Olivieri (2014) <doi:10.1007/s00251-014-0784-3>. | ||
License: GPL-2 | ||
LazyData: true | ||
Encoding: UTF-8 | ||
RoxygenNote: 5.0.1 | ||
Suggests: knitr, rmarkdown | ||
NeedsCompilation: no | ||
Packaged: 2018-12-10 13:36:52 UTC; ayermano | ||
Repository: CRAN | ||
Date/Publication: 2018-12-18 23:30:32 UTC |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
3de6e76fb535ffbc8bd3eb83147fee7e *DESCRIPTION | ||
fc1ab6c98ec46096979562086dd86463 *NAMESPACE | ||
f590b64adca8a16e9d271a0344464000 *R/VDJheatmap.R | ||
857bbb093a2b514268995eb6f4caf618 *R/VDJphylo.R | ||
e56b1581cd36a48408acbda8ab951a4c *R/data.R | ||
9e26b666758020e371dd5ea3f38f0039 *R/extractSequencesR.R | ||
dd8347a7e555108a37aa23d0f177e70c *data/VDJgermlines.RData | ||
6a8d1f93ba94f9efb40c95437d6dad90 *man/VDJgermlines.Rd | ||
5ba446771b251d3a082b997f09d43fb9 *man/VDJheatmap.Rd | ||
e8fd14ec685cc8b8d966dad065e75168 *man/VDJphylo.Rd | ||
b62fbe3663b64a5b22d48b944af8cd9f *man/extractSequencesR.Rd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(VDJheatmap) | ||
export(VDJphylo) | ||
export(extractSequencesR) | ||
import(ape) | ||
import(stringdist) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
#' Calculates the distance between VDJ germlines of interest and produces a matrix that can be used for further analysis or viewed using Heatmap/pheatmap/ComplexHeatmap. | ||
#' | ||
#' @inheritParams extractSequencesR | ||
#' @import stringdist | ||
#' @param inference.method Specifies the distance metric to be used to calculate distance. Currently includes parameters from stringdist::stringdistmatrix(), such as "lv","jv","hamming","qgram","cosine","Jaccard","lcs", or "soundex". | ||
#' @return Returns a matrix where the entries correspond to the VDJ germlines used the neighbor joining algorithm. Can be viewed by heatmap() in base r, or pheatmap::pheatmap(VDJheatmap). | ||
#' @export | ||
#' @examples | ||
#' VDJheatmap(species = "dog",chain = "IGHD",source = "IMGT") | ||
VDJheatmap <- function(species, | ||
chain, | ||
source, | ||
inference.method){ | ||
if(missing(inference.method)) inference.method <- "lv" | ||
|
||
if(species=="all_species"){ | ||
species_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
} | ||
else{ | ||
species_index <- which(VDJgermlines::VDJgermlines$species %in% species) | ||
} | ||
if(chain=="all_chains") chain_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
else{ | ||
chain_index <- which(VDJgermlines::VDJgermlines$chain %in% chain) | ||
} | ||
if(source=="all_sources") source_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
else{ | ||
source_index <- which(VDJgermlines::VDJgermlines$source %in% source) | ||
} | ||
internal_vdj_df <- VDJgermlines::VDJgermlines[Reduce(intersect, list(species_index,chain_index,source_index)),] | ||
|
||
## Tree calculation | ||
temp_leven_matrix <- stringdist::stringdistmatrix(internal_vdj_df$sequence,internal_vdj_df$sequence, method=inference.method) | ||
diag(temp_leven_matrix) <- NA | ||
rownames(temp_leven_matrix) <- internal_vdj_df$names | ||
colnames(temp_leven_matrix) <- internal_vdj_df$names | ||
return(temp_leven_matrix) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#' Infers basic phylogenetic trees of VDJ germlines of interest with basic distance methods. | ||
#' | ||
#' @inheritParams extractSequencesR | ||
#' @import ape stringdist | ||
#' @param inference.method Specifies the method to be used to infer the phylogenetic tree. Currently includes | ||
#' "lv" for Levenshtein distance. | ||
#' @return Returns an unrooted tree inferred using the neighbor joining algorithm. Can be viewed by plot(output_tree) after loading ape package. | ||
#' @export | ||
#' @examples | ||
#' VDJphylo(species = "dog",chain = "IGHD",source = "IMGT",write.fasta = FALSE) | ||
VDJphylo <- function(species, | ||
chain, | ||
source, | ||
write.fasta, | ||
fasta.name, | ||
directory.string, | ||
inference.method){ | ||
if(missing(directory.string)){ | ||
directory.string <- getwd() | ||
} | ||
if(missing(fasta.name)){ | ||
fasta.name <- "VDJgermlines" | ||
} | ||
if(missing(inference.method)) inference.method <- "lv" | ||
|
||
if(species=="all_species"){ | ||
species_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
} | ||
else{ | ||
species_index <- which(VDJgermlines::VDJgermlines$species %in% species) | ||
} | ||
if(chain=="all_chains") chain_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
else{ | ||
chain_index <- which(VDJgermlines::VDJgermlines$chain %in% chain) | ||
} | ||
if(source=="all_sources") source_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
else{ | ||
source_index <- which(VDJgermlines::VDJgermlines$source %in% source) | ||
} | ||
internal_vdj_df <- VDJgermlines::VDJgermlines[Reduce(intersect, list(species_index,chain_index,source_index)),] | ||
if(write.fasta==T){ | ||
ape::write.dna(x=as.matrix(x=c(internal_vdj_df$sequence,internal_vdj_df$names)), | ||
format="fasta", file=paste(directory.string,"/",fasta.name,"_",".fasta",sep="")) | ||
|
||
} | ||
## Tree calculation | ||
temp_leven_matrix <- stringdist::stringdistmatrix(internal_vdj_df$sequence, method=inference.method) | ||
temp_leven_tree <- ape::nj(temp_leven_matrix) | ||
for(m in 1:length(temp_leven_tree$tip.label)){ | ||
temp_leven_tree$tip.label[m] <- internal_vdj_df$names[m] | ||
} | ||
return(temp_leven_tree) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#' VDJgermlines | ||
#' | ||
#' The germline information for a variety of species is contained in the VDJgermlines dataframe. | ||
#' | ||
#' @format A data frame with 7285 rows and 8 variables: | ||
#' \describe{ | ||
#' \item{sequence}{The sequence directly from either IMGT or Vgenerepertoire} | ||
#' \item{names}{The corresponding name of the sequence including V gene number, species} | ||
#' \item{species}{The casual species name - e.g. dog, mouse, rat} | ||
#' \item{chain}{The corresponding chain names, including IGHV, IGHL, TRAV, etc} | ||
#' \item{accession}{The accession number of the given gene} | ||
#' \item{species_name_official}{The more scientific species name e.g. mus musculus} | ||
#' \item{source}{The database that annotated the sequence} | ||
#' \item{strain}{The accompanying strain if available (e.g. B6 vs BALB/c)} | ||
#' } | ||
#' @source IMGT and Vgenerepertoire IMGT (the international ImMunoGeneTics information system (founder and director: Marie-Paule Lefranc, Montpellier, France)." : Lefranc, M.-P. et al., Nucleic Acids Research, 27, 209-212 (1999) Cover; Ruiz, M. et al., Nucleic Acids Research, 28, 219-221 (2000); Lefranc, M.-P., Nucleic Acids Research, 29, 207-209 (2001); Lefranc, M.-P., Nucleic Acids Res., 31, 307-310 (2003); Lefranc, M.-P. et al., In Silico Biol., 5, 0006 (2004) [Epub], 5:45-60 (2005); Lefranc, M.-P. et al., Nucleic Acids Res., 33, D593-D597 (2005) Full text; Lefranc, M.-P. et al., Nucleic Acids Research 2009 37(Database issue): D1006-D1012; doi:10.1093/nar/gkn838 Full text. Nucleic Acids Res. 2015 Jan 28;43(Database issue):D413-422. doi: 10.1093/nar/gku1056. Epub 2014 Nov 5. Full text) and Vgenerepertoire (Immunogenetics. 2014 Aug;66(7-8):479-92. doi: 10.1007/s00251-014-0784-3. Epub 2014 Jun 4. and Vgenerepertoire (Genomic V exons from whole genome shotgun data in reptiles. Olivieri DNx, von Haeften B, Sánchez-Espinel C, Faro J, Gambón-Deza F.). | ||
"VDJgermlines" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#' Extracts the germline genes of interest and stores them as a dataframe | ||
#' | ||
#' @param species Specifies desired species. Current species options can be found by running unique(VDJgermlines$species) after loading in data(VDJgermlines). "all_species" will select all available species. Species include ("alpaca","bovine","crab_eating_macaque","dog", | ||
#' "human","mouse","pig","platypus","rabbit",raintrout","rat","rhesus monkey","salmon", | ||
#' "sheep","zebrafish","atlanticcod","catfish","teleostei","goat","dolphin","mamonkey", | ||
#' "camel","runny") | ||
#' @param chain Specifies the desired chain. "all_chains" will select all chains. The various chains can be listed by running unique(VDJgermlines$chain).Antibody loci start with IG, TCR starts with TR.NExt comes either heavy, light, alpha, beta etc. Examples include IGHD IGHJ IGHV IGIJ IGIV IGKJ IGKV IGLJ IGLV TRAJ TRAV TRBD TRBJ TRBV TRDD TRDJ TRDV TRGJ TRGV | ||
#' @param source Specifies from which source the desired chains should be taken from. Current sources can be found by running unique(VDJgermlines$source) after loading in data(VDJgermlines). IMGT and vgenerepertoire are currently present. | ||
#' @param write.fasta A boolean (TRUE / FALSE) value that species if the output sequences should be also written as a fasta file. | ||
#' @param fasta.name A string specifying the name of the fasta file. If this is left blank this will be defaulted to "VDJgermlines.fasta". Manually including extension is not needed. | ||
#' @param directory.string A string stating the directory that the fasta file should be saved to. If left blank the | ||
#' fasta file will be stored in the current working directory. | ||
#' @return Returns a dataframe containing the species and chains of interest from the desired source. The columns of the dataframe include sequence, names, species, chain, accession, official species name, source, and strain. Furthermore, if write.fasta is set to TRUE then the output will additionally include a fasta file. | ||
#' @export | ||
#' @examples | ||
#' extractSequencesR(species = "dog",chain = "IGHD",source = "IMGT",write.fasta = FALSE) | ||
extractSequencesR <- function(species, | ||
chain, | ||
source, | ||
write.fasta, | ||
fasta.name, | ||
directory.string | ||
){ | ||
#VDJgermlines::VDJgermlines | ||
if(missing(directory.string)){ | ||
directory.string <- getwd() | ||
} | ||
if(missing(fasta.name)){ | ||
fasta.name <- "VDJgermlines" | ||
} | ||
|
||
if(species=="all_species"){ | ||
species_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
} | ||
else{ | ||
species_index <- which(VDJgermlines::VDJgermlines$species %in% species) | ||
} | ||
if(chain=="all_chains") chain_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
else{ | ||
chain_index <- which(VDJgermlines::VDJgermlines$chain %in% chain) | ||
} | ||
if(source=="all_sources") source_index <- 1:nrow(VDJgermlines::VDJgermlines) | ||
else{ | ||
source_index <- which(VDJgermlines::VDJgermlines$source %in% source) | ||
} | ||
internal_vdj_df <- VDJgermlines::VDJgermlines[Reduce(intersect, list(species_index,chain_index,source_index)),] | ||
if(write.fasta==T){ | ||
ape::write.dna(x=as.matrix(x=c(internal_vdj_df$sequence,internal_vdj_df$names)), | ||
format="fasta", file=paste(directory.string,"/",fasta.name,"_",".fasta",sep="")) | ||
|
||
} | ||
return(internal_vdj_df) | ||
} |
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.