Skip to content
Janne H. Korhonen edited this page Jul 31, 2018 · 3 revisions

Basic overview of MOODS functions.

Python

MOODS.tools

Helper functions for manipulating position weight matrices and sequences.

Sequence functions

MOODS.tools.flat_bg(alphabet_size)
MOODS.tools.bg_from_sequence_dna(seq, pseudocount)
MOODS.tools.snp_variants(seq)

Matrix functions

MOODS.tools.reverse_complement(matrix)
MOODS.tools.log_odds(matrix, background, pseudocount[, log_base])

MOODS.tools.threshold_from_p(matrix, background, pseudocount)
MOODS.tools.threshold_from_p_with_precision(matrix, background, pseudocount, precision) # in 1.9.4

MOODS.tools.max_score(matrix)
MOODS.tools.min_score(matrix)
MOODS.tools.max_delta(matrix)

Higher-order matrix functions

Analogues of matrix functions for high-order PWMs. Same as standard matrix functions, but the size of the input alphabet is given as the last parameter (4 for DNA alphabet.)

MOODS.tools.reverse_complement(matrix, alphabet_size)
MOODS.tools.threshold_from_p(matrix, background, pseudocount, alphabet_size)
MOODS.tools.threshold_from_p_with_precision(matrix, background, pseudocount, precision, alphabet_size) # in 1.9.4

MOODS.tools.max_score(matrix, alphabet_size)
MOODS.tools.min_score(matrix, alphabet_size)

MOODS.tools.log_odds(matrix, low_order_terms, background, pseudocount, alphabet_size)
MOODS.tools.log_odds(matrix, low_order_terms, background, pseudocount, alphabet_size[, log_base])

MOODS.parsers

Basic parsers for reading .pfm and .adm files. See file formats. The parsers either return the matrix as it is, or converted to log-likelihood scores.

Standard PWMs

MOODS.parsers.pfm(filename)
MOODS.parsers.pfm_to_log_odds(filename, background, pseudocount[, log_base])

First-order PWMs

MOODS.parsers.adm_to_log_odds(filename, background, pseudocount, alphabet_size[, log_base])
MOODS.parsers.adm_1o_terms(filename, alphabet_size=4)
MOODS.parsers.adm_0o_terms(filename, alphabet_size=4)

MOODS.scan

Actual scanning. The basic scanning functions internally create a Scanner object and use it to scan the input sequence.

 # for DNA alphabets
MOODS.scan.scan_dna(seq, matrices, background, thresholds, window_size = 7 )

# non-DNA alphabets; window_size = 7 is probably the best
MOODS.scan.scan(seq, matrices, background, thresholds, window_size, alphabet_size) 

# tries to give about target many best hits; optional parameters tune the search algorithm
MOODS.scan.scan_best_hits_dna(seq, matrices, target, iterations = 10, MULT = 2, LIMIT_MULT = 10, window_size = 7) 

For more complicated use cases, one can explicitly create a Scanner object and use it to scan multiple sequences in one go. See ex-scanner.py example script provided with MOODS package.

Constructors for Scanner. Note that Scanner needs to be initialised with the set_motifs method.

# DNA alphabet; window_size = 7 is usually good
MOODS.scan.Scanner(window_size)

# arbitrary alphabet
# the second parameter should be a list of strings specifying which letter
# correspond to each position of the alphabet, e.g. for DNA alphabet, the
# second parameter should be ["aA", "cC", "gG", "tT"]
MOODS.scan.Scanner(window_size, alphabet)

Scanner object functions.

Scanner.set_motifs(matrices, background,thresholds)
Scanner.scan(seq)

Scanner.variant_matches(seq, variants, max_depth = 0) # for SNPs and other sequence variants

C++

The C++ implementations of the functions for calling them directly from C++. The headers can be found in moods_tools.h, moods_parsers.h and moods_scan.h.

The function declarations can be found below for reference. Functionality is identical to the Python versions.

moods_tools.h

    // background functions
    std::vector<double> flat_bg(const unsigned int alphabet_size);
    std::vector<double> bg_from_sequence_dna(const std::string &seq, const double ps);
    
    std::vector<MOODS::variant> snp_variants(const std::string &seq);
    
    // matrix transformations
    score_matrix reverse_complement(const score_matrix &mat);
    score_matrix log_odds(const score_matrix &mat, const std::vector<double> &bg, const double ps);
    score_matrix log_odds(const score_matrix &mat, const std::vector<double> &bg, const double ps, const double log_base);
    
    // threshold from p
    double threshold_from_p(const score_matrix &pssm, const std::vector<double> &bg, const double &p);
    double threshold_from_p_with_precision(const score_matrix &pssm, const std::vector<double> &bg, const double &p, const double precision);
    
    // min / max
    double max_score(const score_matrix &mat);
    double min_score(const score_matrix &mat);
    double min_delta(const score_matrix &mat);

    // high-order versions
    double max_score(const score_matrix &mat, const size_t a);
    double min_score(const score_matrix &mat, const size_t a);
    double threshold_from_p(const score_matrix &pssm, const std::vector<double> &bg, const double &p, const size_t a);
    double threshold_from_p_with_precision(const score_matrix &pssm, const std::vector<double> &bg, const double &p, const double precision, const size_t a);
    score_matrix reverse_complement(const std::vector<std::vector<double>> &mat, size_t a);
    score_matrix log_odds(const score_matrix &mat, const std::vector<std::vector<double> >& low_order_terms,
                          const std::vector<double> &bg, const double ps, const size_t a);
    score_matrix log_odds(const score_matrix &mat, const std::vector<std::vector<double> >& low_order_terms,
                          const std::vector<double> &bg, const double ps, const size_t a, const double log_base);

moods_parsers.h

    score_matrix pfm(const std::string& filename);
    score_matrix pfm_to_log_odds(const std::string& filename, const std::vector<double> &bg, const double pseudocount, const double log_base = -1);
    
    score_matrix adm_1o_terms(const std::string& filename, const size_t a = 4);
    score_matrix adm_0o_terms(const std::string& filename, const size_t a = 4);
    
    score_matrix adm_to_log_odds(const std::string& filename, const std::vector<double> &bg, const double pseudocount, const size_t a = 4, const double log_base = -1);

moods_scan.h

    std::vector<std::vector<MOODS::match> > scan_dna(const std::string& seq,
                                                const std::vector<score_matrix>& matrices,
                                                const std::vector<double>& bg,
                                                const std::vector<double> thresholds,
                                                unsigned int window_size = 7 );

    std::vector<std::vector<MOODS::match> > scan(const std::string& seq,
                                            const std::vector<score_matrix>& matrices,
                                            const std::vector<double>& bg,
                                            const std::vector<double> thresholds,
                                            unsigned int window_size,
                                            const std::vector<std::string>& alphabet);

    std::vector< std::vector<MOODS::match> > scan_best_hits_dna(const std::string& seq,
                                                          const std::vector<score_matrix>& matrices,
                                                          size_t target,
                                                          int iterations = 10,
                                                          unsigned int MULT = 2,
                                                          size_t LIMIT_MULT = 10,
                                                          size_t window_size = 7);

Scanner

    class Scanner {
    public:
        Scanner(unsigned int window_size);
        Scanner(unsigned int window_size, const std::vector<std::string>& alphabet);

        // void set_motifs(const std::vector<MOODS::scan::Motif>& motifs);
        void set_motifs(const std::vector<score_matrix>& matrices,
                        const std::vector<double>& bg,
                        const std::vector<double> thresholds);
        std::vector<std::vector<MOODS::match> > scan(const std::string& s);
        std::vector<std::vector<MOODS::match_with_variant> > variant_matches(const std::string& seq, const std::vector<MOODS::variant>& variants, int max_depth = 0);
    }