Permalink
Browse files

first commit

  • Loading branch information...
0 parents commit 44d2672f0b1751b9595b139b47c4ddcc2daac7fd Ian Holmes committed Mar 27, 2009
Showing 1,075 changed files with 305,058 additions and 0 deletions.
40 INSTALL
@@ -0,0 +1,40 @@
+INSTALLATION
+============
+
+Type the following...
+
+ make all
+
+...to make all targets.
+
+Alternatively, the following will make individual packages:
+
+ make xrate
+ make handel
+ make stemloc
+ make evoldoer
+
+Executables go in the following directories:
+
+ dart/bin
+ dart/perl
+
+Some programs expect you to set the environment variable DARTDIR
+to the root path of the dart distribution, but omitting this
+is rarely fatal.
+
+To compile, you'll need the following tools:
+
+ -- GNU gcc (version 4.0 or higher)
+ -- GNU make (probably any recent version, but 3.80 is safe)
+ -- GNU ar (found in the binutils package; version 2.14 is safe)
+ -- GNU test/rm/mv/ln (found in the coreutils package)
+
+You may be able to get away without having GNU ar, but probably not
+without GNU make and definitely not without GNU gcc. Really, DART
+should be considered a GNU package.
+
+There's nothing as sophisticated as autoconf here yet, so if your
+C++ compiler isn't on your path as 'gcc', or your setup is nonstandard
+in some other way and you need to edit the Makefiles, the relevant
+definitions are all in 'dart/src/make.defs'.
340 LICENSE

Large diffs are not rendered by default.

Oops, something went wrong.
135 Makefile
@@ -0,0 +1,135 @@
+# All Targets
+all: stemloc xrate xgram xfold xprot simgram handel evoldeeds psw empath kimono weighbor utils
+ @echo All targets built
+
+# files & paths
+TESTS =
+TARGETS =
+DIRNAME =
+DEPS =
+CCDIR = $(shell pwd)
+SRCDIR = $(CCDIR)/src
+
+# Makefile magic: get a list of subdirectories of the src directory
+# NB the following pattern also picks up files in the src directory,
+# so we later need to test for directories using 'test -d'
+SUBDIRS = $(filter-out %CVS,$(wildcard $(SRCDIR)/*))
+
+# Debug or release?
+# The following conditional syntax allows constructs like 'make release all', 'make profile all' or 'make debug all' with GNU make.
+ifneq (,$(findstring debug,$(MAKECMDGOALS)))
+RELEASE = debug
+else
+ifneq (,$(findstring profile,$(MAKECMDGOALS)))
+RELEASE = profile
+else
+ifneq (,$(findstring noopt,$(MAKECMDGOALS)))
+RELEASE = noopt
+else
+RELEASE = release
+endif
+endif
+endif
+
+# Dummy pseudotargets for 'make debug ...', 'make release ...', etc:
+debug profile release:
+ echo Top-level makefile: compiling in '$@' mode
+
+# Include defs
+include $(SRCDIR)/make.defs
+
+# TARGETS
+
+# Clean up
+# NB the definition of $(SUBDIRS) also picks up files in the src directory,
+# so we need to test for directories using 'test -d'
+clean:
+ for SUBDIR in $(SUBDIRS); do test -d $$SUBDIR && (cd $$SUBDIR; $(MAKE) $(RELEASE) clean); done
+
+cleanlib:
+ for SUBDIR in $(SUBDIRS); do test -d $$SUBDIR && (cd $$SUBDIR; $(MAKE) $(RELEASE) cleanlib); done
+
+# Handel : MCMC statistical alignment package
+# Includes tkfalign, tkfemit, tkfdistance
+Handel handel:
+ cd $(SRCDIR)/handel; $(MAKE) -k $(RELEASE) dep handel
+ cd $(SRCDIR)/tkf; $(MAKE) -k $(RELEASE) dep tkfhandel
+
+# xrate : fast phylo-grammar training & annotation using EM
+XRATE Xrate xrate:
+ cd $(SRCDIR)/ecfg; $(MAKE) -k $(RELEASE) dep xrate
+
+# Tests (xrate, SCFGs)
+test:
+ $(MAKE) $(RELEASE) all
+ cd $(SRCDIR)/ecfg; $(MAKE) $(RELEASE) test
+ cd $(SRCDIR)/scfg; $(MAKE) $(RELEASE) test
+
+# stemloc: multiple alignment of RNA sequences
+# the default (pseudo-stemloc) is commented out while I fix the bifurcation iterators -- IH, 12/13/06
+# StemLoc Stemloc stemloc: pseudovec-bifurc-stemloc
+StemLoc Stemloc stemloc: explicit-bifurc-stemloc
+
+# pseudo-stemloc
+# Use bifurcation "pseudovector" iterators (slimmest build)
+# These are experimental and are known not to work. Recommended that you avoid them.
+pseudovec-bifurc-stemloc:
+ cd $(SRCDIR)/stemloc; $(MAKE) -k $(RELEASE) pseudovec_bifurc dep stemloc
+
+# explicit-bifurc-stemloc
+# Explicitly enumerate bifurcations in fold envelope (faster, but uses O(L^3) space)
+explicit-bifurc-stemloc:
+ cd $(SRCDIR)/stemloc; $(MAKE) -k $(RELEASE) explicit_bifurc dep stemloc
+
+# dense-stemloc
+# Use dense(faster,fatter) rather than sparse(slower,slimmer) DP matrices for Pair SCFGs
+# Also uses explicit enumeration of bifurcations (see above)
+dense-stemloc:
+ cd $(SRCDIR)/stemloc; $(MAKE) -k $(RELEASE) alloc_dense explicit_bifurc dep stemloc
+
+
+# evoldoer: pairwise statistical alignment of RNA sequences
+evoldoer:
+ cd $(SRCDIR)/evoldoer; $(MAKE) -k $(RELEASE) dep evoldoer
+
+# evoldeeds: evoldoer, indiegram, stemloc
+evoldeeds: evoldoer indiegram stemloc
+
+# Indiegram: three-way statistical alignment of RNA sequences
+indiegram:
+ cd $(SRCDIR)/indiegram; $(MAKE) -k $(RELEASE) workaround dep indiegram
+
+
+# xgram/xfold/xprot: variants of xrate
+# simgram: generate simulated sample alignments from xrate grammars
+xgram xfold xprot simgram:
+ cd $(SRCDIR)/ecfg; $(MAKE) -k $(RELEASE) dep $@
+
+# Cis-regulatory motif-finding programs
+# kimono: microarray clustering and motif-finding
+Kimono kimono:
+ cd $(SRCDIR)/kimono; $(MAKE) -k $(RELEASE) dep kimono kmeans
+
+# empath: motif-finding
+Empath empath:
+ cd $(SRCDIR)/empath; $(MAKE) -k $(RELEASE) dep empath
+
+# Probabilistic Smith-Waterman (PSW) implementations
+psw: ppsw dpsw
+
+ppsw:
+ cd $(SRCDIR)/psw; $(MAKE) -k $(RELEASE) dep ppswalign ppswtrain
+
+dpsw:
+ cd $(SRCDIR)/psw; $(MAKE) -k $(RELEASE) dep dpswalign dpswtrain
+
+# Utility programs
+utils:
+ cd $(SRCDIR)/seq; $(MAKE) -k $(RELEASE) dep wordcount cmpalign cmpfold
+ cd $(SRCDIR)/stemloc; $(MAKE) -k $(RELEASE) dep gc2gr-ss
+ cd $(SRCDIR)/tree; $(MAKE) -k $(RELEASE) dep bsupp drawpstree eztree reroot getinsertions
+ @echo All utilities built
+
+# Bill Bruno's weighbor (distributed with DART for historical reasons)
+Weighbor weighbor:
+ cd $(SRCDIR)/Weighbor; $(MAKE) -k weighbor
140 README
@@ -0,0 +1,140 @@
+DART: DNA/Amino/RNA Tests
+===============================================================================
+
+The primary reference point for DART is the following URL:
+
+http://biowiki.org/dart
+
+===============================================================================
+
+INSTALLATION
+============
+
+Type "make all".
+
+See INSTALL file for more info.
+
+===============================================================================
+
+NOTES
+=====
+
+DART is a collection of application programs for doing
+probabilistic bioinformatics using evolutionary models
+and stochastic grammars.
+
+As well as the primary packages (Handel, Stemloc, XRate etc),
+DART contains various utilities related to the primary datatypes:
+DNA, RNA and protein sequence.
+
+The package was mostly written by Ian Holmes <ihh@berkeley.edu>
+For a full list of contributors see http://biowiki.org/dart
+
+Some free packages from external sources are included in the distribution:
+ dart/src/newmat [Newmat, by RB Davies]
+ dart/src/randlib [randlib, from UTexas Biomath dept]
+ dart/src/Weighbor [Weighbor, by Bill Bruno & Nick Socci]
+ dart/src/util/Regexp.* [regexp library by Henry Spencer & others]
+
+DART is released under the GNU Public License (GPL).
+
+===============================================================================
+
+Supported packages
+==================
+
+xrate: A package for training phylo-grammars using EM,
+ and applying them to annotate alignments.
+
+Handel: phylogenetic multiple alignment software based on
+ the TKF91 evolutionary indel/substitution model.
+
+Stemloc: comparative RNA structure-finder using accelerated
+ pairwise stochastic context-free grammars
+
+evoldoer: pairwise RNA alignment using an evolutionary model
+ (formerly "tkfstalign")
+
+
+Also included are various bioinformatics tools, tests and algorithms.
+
+
+Longer (but not by much) package descriptions
+=============================================
+
+** xrate
+
+This is a package for training & annotation using phylogrammars.
+See the following URL for documentation:
+
+http://biowiki.org/XrateSoftware
+
+
+** Handel
+
+This is a package for doing multiple sequence alignment under an
+evolutionary model.
+
+Since it uses a probabilistic (MCMC) approach, as well as "greedy"
+heuristics (progressive alignment etc), the program can generate
+suboptimal alignments as well as looking for the best alignment.
+
+The probabilistic framework also means that the program can be
+"trained" directly from data, avoiding the need to supply
+"gap penalties", "substitution matrices" etc.
+
+Currently the evolutionary model used is the TKF model.
+This implies global alignment, homogenous selection & linear gap costs.
+A more realistic model (affine gaps, local alignment, heterogenous)
+is under development.
+
+The main programs in this package are:
+
+
+tkfemit -- generates alignments from the TKF model, given a tree
+tkfdistance -- computes a PHYLIP distance matrix from unaligned sequences
+tkfalign -- builds & samples alignments, given a tree
+tkfidem -- EM estimation of TKF91 indel rates
+phylocomposer -- phylogenetic transducer composition & alignment
+phylodirector.pl -- animations of phylo-transducers
+weighbor -- Bill Bruno's program to build a tree from the tkfdistance matrix
+
+
+
+** Stemloc
+
+This is a program for finding conserved motifs in RNA sequences,
+using pairwise stochastic context-free grammars.
+
+The algorithm simultaneously aligns and folds multiple RNA sequences,
+and may be viewed as a constrained (and so accelerated) version of
+the dynamic programming method of Sankoff et al.
+
+Again, due to the probabilistic nature of the approach,
+the program can be "trained" directly from data, without the
+need for expert knowledge to set the parameters.
+
+The main program in this package is "stemloc" itself.
+See the following URL for a tutorial:
+
+http://biowiki.org/StemlocTutorial
+
+
+** evoldoer
+
+Like stemloc, this is an RNA alignment program, but it only does
+pairwise alignments, not multiple alignments. It is however
+fully based on an evolutionary model for RNA structure,
+called the TKF Structure Tree (TKFST), based on the TKF91 model.
+TKFST models indels of whole substructures as well as point
+(base) substitutions & indels and covariant (basepair)
+substitutions & indels.
+
+
+
+REFERENCES
+==========
+
+For references see the following URL:
+
+http://biowiki.org/PaperArchive
@@ -0,0 +1,2 @@
+This directory contains executables, including tests.
+This file exists solely so that CVS doesn't balk at an empty directory.
@@ -0,0 +1,73 @@
+(chain
+ (update-policy rev)
+ (terminal (NUC))
+ (hidden-class
+ (row CLASS)
+ (label (1 2)))
+
+ ;; initial probability distribution
+ (initial (state (a 1)) (prob 0.0238705))
+ (initial (state (c 1)) (prob 0.136706))
+ (initial (state (g 1)) (prob 0.0136832))
+ (initial (state (u 1)) (prob 0.204652))
+ (initial (state (a 2)) (prob 0.305866))
+ (initial (state (c 2)) (prob 0.0413438))
+ (initial (state (g 2)) (prob 0.170151))
+ (initial (state (u 2)) (prob 0.103727))
+
+ ;; mutation rates
+ (mutate (from (a 1)) (to (c 1)) (rate 0.157809))
+ (mutate (from (a 1)) (to (g 1)) (rate 0.310445))
+ (mutate (from (a 1)) (to (u 1)) (rate 0.542682))
+ (mutate (from (a 1)) (to (a 2)) (rate 0.0196586))
+ (mutate (from (c 1)) (to (a 1)) (rate 0.0275553))
+ (mutate (from (c 1)) (to (g 1)) (rate 0.0153761))
+ (mutate (from (c 1)) (to (u 1)) (rate 0.107003))
+ (mutate (from (c 1)) (to (c 2)) (rate 0.000126849))
+ (mutate (from (g 1)) (to (a 1)) (rate 0.541573))
+ (mutate (from (g 1)) (to (c 1)) (rate 0.153619))
+ (mutate (from (g 1)) (to (u 1)) (rate 0.803682))
+ (mutate (from (g 1)) (to (g 2)) (rate 0.0200437))
+ (mutate (from (u 1)) (to (a 1)) (rate 0.063298))
+ (mutate (from (u 1)) (to (c 1)) (rate 0.0714773))
+ (mutate (from (u 1)) (to (g 1)) (rate 0.0537349))
+ (mutate (from (u 1)) (to (u 2)) (rate 0.0036554))
+ (mutate (from (a 2)) (to (a 1)) (rate 0.0015342))
+ (mutate (from (a 2)) (to (c 2)) (rate 0.0411113))
+ (mutate (from (a 2)) (to (g 2)) (rate 0.0812237))
+ (mutate (from (a 2)) (to (u 2)) (rate 0.165302))
+ (mutate (from (c 2)) (to (c 1)) (rate 0.000419434))
+ (mutate (from (c 2)) (to (a 2)) (rate 0.304146))
+ (mutate (from (c 2)) (to (g 2)) (rate 0.141506))
+ (mutate (from (c 2)) (to (u 2)) (rate 0.969275))
+ (mutate (from (g 2)) (to (g 1)) (rate 0.00161188))
+ (mutate (from (g 2)) (to (a 2)) (rate 0.146009))
+ (mutate (from (g 2)) (to (c 2)) (rate 0.0343834))
+ (mutate (from (g 2)) (to (u 2)) (rate 0.0920692))
+ (mutate (from (u 2)) (to (u 1)) (rate 0.00721206))
+ (mutate (from (u 2)) (to (a 2)) (rate 0.487438))
+ (mutate (from (u 2)) (to (c 2)) (rate 0.386336))
+ (mutate (from (u 2)) (to (g 2)) (rate 0.151028))
+ ) ;; end chain NUC
+
+
+;; IUPAC RNA alphabet
+(alphabet
+ (name RNA)
+ (token (a c g u))
+ (complement (u g c a))
+ (extend (to n) (from a) (from c) (from g) (from u))
+ (extend (to x) (from a) (from c) (from g) (from u))
+ (extend (to t) (from u))
+ (extend (to r) (from a) (from g))
+ (extend (to y) (from c) (from u))
+ (extend (to m) (from a) (from c))
+ (extend (to k) (from g) (from u))
+ (extend (to s) (from c) (from g))
+ (extend (to w) (from a) (from u))
+ (extend (to h) (from a) (from c) (from u))
+ (extend (to b) (from c) (from g) (from u))
+ (extend (to v) (from a) (from c) (from g))
+ (extend (to d) (from a) (from g) (from u))
+ (wildcard *)
+) ;; end alphabet RNA
Oops, something went wrong.

0 comments on commit 44d2672

Please sign in to comment.