-
Notifications
You must be signed in to change notification settings - Fork 12
Function reference
Basic overview of MOODS functions.
Helper functions for manipulating position weight matrices and sequences.
MOODS.tools.flat_bg(alphabet_size)
MOODS.tools.bg_from_sequence_dna(seq, pseudocount)
MOODS.tools.snp_variants(seq)
MOODS.tools.reverse_complement(matrix)
MOODS.tools.log_odds(matrix, background, pseudocount[, log_base])
MOODS.tools.threshold_from_p(matrix, background, pseudocount)
MOODS.tools.threshold_from_p_with_precision(matrix, background, pseudocount, precision) # in 1.9.4
MOODS.tools.max_score(matrix)
MOODS.tools.min_score(matrix)
MOODS.tools.max_delta(matrix)
Analogues of matrix functions for high-order PWMs. Same as standard matrix functions, but the size of the input alphabet is given as the last parameter (4 for DNA alphabet.)
MOODS.tools.reverse_complement(matrix, alphabet_size)
MOODS.tools.threshold_from_p(matrix, background, pseudocount, alphabet_size)
MOODS.tools.threshold_from_p_with_precision(matrix, background, pseudocount, precision, alphabet_size) # in 1.9.4
MOODS.tools.max_score(matrix, alphabet_size)
MOODS.tools.min_score(matrix, alphabet_size)
MOODS.tools.log_odds(matrix, low_order_terms, background, pseudocount, alphabet_size)
MOODS.tools.log_odds(matrix, low_order_terms, background, pseudocount, alphabet_size[, log_base])
Basic parsers for reading .pfm
and .adm
files. See file formats. The parsers either return the matrix as it is, or converted to log-likelihood scores.
MOODS.parsers.pfm(filename)
MOODS.parsers.pfm_to_log_odds(filename, background, pseudocount[, log_base])
MOODS.parsers.adm_to_log_odds(filename, background, pseudocount, alphabet_size[, log_base])
MOODS.parsers.adm_1o_terms(filename, alphabet_size=4)
MOODS.parsers.adm_0o_terms(filename, alphabet_size=4)
Actual scanning. The basic scanning functions internally create a Scanner
object and use it to scan the input sequence.
# for DNA alphabets
MOODS.scan.scan_dna(seq, matrices, background, thresholds, window_size = 7 )
# non-DNA alphabets; window_size = 7 is probably the best
MOODS.scan.scan(seq, matrices, background, thresholds, window_size, alphabet_size)
# tries to give about target many best hits; optional parameters tune the search algorithm
MOODS.scan.scan_best_hits_dna(seq, matrices, target, iterations = 10, MULT = 2, LIMIT_MULT = 10, window_size = 7)
For more complicated use cases, one can explicitly create a Scanner
object and use it to scan multiple sequences in one go. See ex-scanner.py
example script provided with MOODS package.
Constructors for Scanner
. Note that Scanner
needs to be initialised with the set_motifs
method.
# DNA alphabet; window_size = 7 is usually good
MOODS.scan.Scanner(window_size)
# arbitrary alphabet
# the second parameter should be a list of strings specifying which letter
# correspond to each position of the alphabet, e.g. for DNA alphabet, the
# second parameter should be ["aA", "cC", "gG", "tT"]
MOODS.scan.Scanner(window_size, alphabet)
Scanner
object functions.
Scanner.set_motifs(matrices, background,thresholds)
Scanner.scan(seq)
Scanner.variant_matches(seq, variants, max_depth = 0) # for SNPs and other sequence variants
The C++ implementations of the functions for calling them directly from C++. The headers can be found in moods_tools.h
, moods_parsers.h
and moods_scan.h
.
The function declarations can be found below for reference. Functionality is identical to the Python versions.
// background functions
std::vector<double> flat_bg(const unsigned int alphabet_size);
std::vector<double> bg_from_sequence_dna(const std::string &seq, const double ps);
std::vector<MOODS::variant> snp_variants(const std::string &seq);
// matrix transformations
score_matrix reverse_complement(const score_matrix &mat);
score_matrix log_odds(const score_matrix &mat, const std::vector<double> &bg, const double ps);
score_matrix log_odds(const score_matrix &mat, const std::vector<double> &bg, const double ps, const double log_base);
// threshold from p
double threshold_from_p(const score_matrix &pssm, const std::vector<double> &bg, const double &p);
double threshold_from_p_with_precision(const score_matrix &pssm, const std::vector<double> &bg, const double &p, const double precision);
// min / max
double max_score(const score_matrix &mat);
double min_score(const score_matrix &mat);
double min_delta(const score_matrix &mat);
// high-order versions
double max_score(const score_matrix &mat, const size_t a);
double min_score(const score_matrix &mat, const size_t a);
double threshold_from_p(const score_matrix &pssm, const std::vector<double> &bg, const double &p, const size_t a);
double threshold_from_p_with_precision(const score_matrix &pssm, const std::vector<double> &bg, const double &p, const double precision, const size_t a);
score_matrix reverse_complement(const std::vector<std::vector<double>> &mat, size_t a);
score_matrix log_odds(const score_matrix &mat, const std::vector<std::vector<double> >& low_order_terms,
const std::vector<double> &bg, const double ps, const size_t a);
score_matrix log_odds(const score_matrix &mat, const std::vector<std::vector<double> >& low_order_terms,
const std::vector<double> &bg, const double ps, const size_t a, const double log_base);
score_matrix pfm(const std::string& filename);
score_matrix pfm_to_log_odds(const std::string& filename, const std::vector<double> &bg, const double pseudocount, const double log_base = -1);
score_matrix adm_1o_terms(const std::string& filename, const size_t a = 4);
score_matrix adm_0o_terms(const std::string& filename, const size_t a = 4);
score_matrix adm_to_log_odds(const std::string& filename, const std::vector<double> &bg, const double pseudocount, const size_t a = 4, const double log_base = -1);
std::vector<std::vector<MOODS::match> > scan_dna(const std::string& seq,
const std::vector<score_matrix>& matrices,
const std::vector<double>& bg,
const std::vector<double> thresholds,
unsigned int window_size = 7 );
std::vector<std::vector<MOODS::match> > scan(const std::string& seq,
const std::vector<score_matrix>& matrices,
const std::vector<double>& bg,
const std::vector<double> thresholds,
unsigned int window_size,
const std::vector<std::string>& alphabet);
std::vector< std::vector<MOODS::match> > scan_best_hits_dna(const std::string& seq,
const std::vector<score_matrix>& matrices,
size_t target,
int iterations = 10,
unsigned int MULT = 2,
size_t LIMIT_MULT = 10,
size_t window_size = 7);
class Scanner {
public:
Scanner(unsigned int window_size);
Scanner(unsigned int window_size, const std::vector<std::string>& alphabet);
// void set_motifs(const std::vector<MOODS::scan::Motif>& motifs);
void set_motifs(const std::vector<score_matrix>& matrices,
const std::vector<double>& bg,
const std::vector<double> thresholds);
std::vector<std::vector<MOODS::match> > scan(const std::string& s);
std::vector<std::vector<MOODS::match_with_variant> > variant_matches(const std::string& seq, const std::vector<MOODS::variant>& variants, int max_depth = 0);
}