Skip to content

Commit

Permalink
Update documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
cangermueller committed May 16, 2017
1 parent b96e484 commit 8be6c39
Show file tree
Hide file tree
Showing 14 changed files with 121 additions and 38 deletions.
2 changes: 1 addition & 1 deletion deepcpg/data/annotations.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Functionality for reading and matching annotations."""
"""Functions for reading and matching annotations."""

from __future__ import division
from __future__ import print_function
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/data/dna.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Provides functions for representing DNA sequences."""
"""Functions for representing DNA sequences."""

from __future__ import division
from __future__ import print_function
Expand Down
58 changes: 58 additions & 0 deletions deepcpg/data/fasta.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Functions reading FASTA files."""

from __future__ import division
from __future__ import print_function

Expand All @@ -11,13 +13,26 @@


class FastaSeq(object):
"""FASTA sequence."""

def __init__(self, head, seq):
self.head = head
self.seq = seq


def parse_lines(lines):
"""Parse FASTA sequences from list of strings.
Parameters
----------
lines: list
List of lines from FASTA file.
Returns
-------
list
List of :class:`FastaSeq` objects.
"""
seqs = []
seq = None
start = None
Expand All @@ -38,6 +53,21 @@ def parse_lines(lines):


def read_file(filename, gzip=None):
"""Read FASTA file and return sequences.
Parameters
----------
filename: str
File name.
gzip: bool
If `True`, file is gzip compressed. If `None`, suffix is used to
determine if file is compressed.
Returns
-------
List of :class:`FastaSeq` objects.
"""
list
if gzip is None:
gzip = filename.endswith('.gz')
if gzip:
Expand All @@ -49,6 +79,20 @@ def read_file(filename, gzip=None):


def select_file_by_chromo(filenames, chromo):
"""Select file of chromosome `chromo`.
Parameters
----------
filenames: list
List of file names or directory with FASTA files.
chromo: str
Chromosome that is selected.
Returns
-------
str
Filename in `filenames` that contains chromosome `chromo`.
"""
filenames = to_list(filenames)
if len(filenames) == 1 and os.path.isdir(filenames[0]):
filenames = glob(os.path.join(filenames[0],
Expand All @@ -60,6 +104,20 @@ def select_file_by_chromo(filenames, chromo):


def read_chromo(filenames, chromo):
"""Read DNA sequence of chromosome `chromo`.
Parameters
----------
filenames: list
List of FASTA files.
chromo: str
Chromosome that is read.
Returns
-------
str
DNA sequence of chromosome `chromo`.
"""
filename = select_file_by_chromo(filenames, chromo)
if not filename:
raise ValueError('DNA file for chromosome "%s" not found!' % chromo)
Expand Down
73 changes: 50 additions & 23 deletions deepcpg/data/feature_extractor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Feature extraction."""

from __future__ import division
from __future__ import print_function

Expand All @@ -6,26 +8,32 @@


class KnnCpgFeatureExtractor(object):
"""Extracts k CpG sites next to target sites. Excludes CpG sites at the
"""Extract k CpG sites next to target sites. Exclude CpG sites at the
same position.
"""

def __init__(self, k=1):
self.k = k

def extract(self, x, y, ys):
"""Extracts state and distance of k CpG sites next to target sites.
"""Extract state and distance of k CpG sites next to target sites.
Target site is excluded.
Parameters
----------
x: numpy array with target positions sorted in ascending order
y: numpy array with source positions sorted in ascending order
ys: numpy array with source CpG states
x: :class:`numpy.ndarray`
:class:`numpy.ndarray` with target positions sorted in ascending
order.
y: :class:`numpy.ndarray`
:class:`numpy.ndarray` with source positions sorted in ascending
order.
ys: :class:`numpy.ndarray`
:class:`numpy.ndarray` with source CpG states.
Returns
-------
Tuple (cpg, dist) with numpy arrays of dimension (len(x), 2k):
tuple
Tuple (cpg, dist) with numpy arrays of dimension (len(x), 2k):
cpg: CpG states to the left (0:k) and right (k:2k)
dist: Distances to the left (0:k) and right (k:2k)
"""
Expand Down Expand Up @@ -78,12 +86,14 @@ def extract(self, x, y, ys):
return (knn_cpg, knn_dist)

def __larger_equal(self, x, y):
"""Returns for each x[i] index j, s.t. y[j] >= x[i].
"""Return for each x[i] index j, s.t. y[j] >= x[i].
Parameters
----------
x : numpy array of with positions sorted in ascending order
y : numpy array of with positions sorted in ascending order
x: :class:`numpy.ndarray`
:class:`numpy.ndarray` of with positions sorted in ascending order.
y: :class:`numpy.ndarray`
:class:`numpy.ndarray` of with positions sorted in ascending order.
"""

n = len(x)
Expand All @@ -103,21 +113,24 @@ def __larger_equal(self, x, y):


class IntervalFeatureExtractor(object):
"""Checks if positions are in a list of intervals (start, end)."""
"""Check if positions are in a list of intervals (start, end)."""

@staticmethod
def join_intervals(s, e):
"""Transforms a list of possible overlapping intervals into
"""Transform a list of possible overlapping intervals into
non-overlapping intervals.
Parameters
----------
s : list with start of interval sorted in ascending order
e : list with end of interval
s: list
List with start of interval sorted in ascending order.
e: list
List with end of interval.
Returns
-------
Tuple (s, e) of non-overlapping intervals
tuple
Tuple (s, e) of non-overlapping intervals.
"""

rs = []
Expand All @@ -141,18 +154,22 @@ def join_intervals(s, e):

@staticmethod
def index_intervals(x, ys, ye):
"""Returns for positions x[i] index j, s.t. ys[j] <= x[i] <= ye[j] or -1.
"""Return for positions x[i] index j, s.t. ys[j] <= x[i] <= ye[j] or -1.
Intervals must be non-overlapping!
Parameters
----------
x : list of positions
ys: list with start of interval sorted in ascending order
ye: list with end of interval
x : list
List of positions.
ys: list
List with start of interval sorted in ascending order.
ye: list
List with end of interval.
Returns
-------
numpy array of same length than x with index or -1
:class:`numpy.ndarray`
:class:`numpy.ndarray` of same length than x with index or -1.
"""

n = len(ys)
Expand All @@ -176,22 +193,32 @@ def extract(self, x, ys, ye):
class KmersFeatureExtractor(object):

def __init__(self, kmer_len, nb_char=4):
"""Extract kmer frequencies from integer sequences.
Parameters
----------
kmer_len: int
Kmer length.
nb_char: int
Number of characters in alphabet.
"""
self.kmer_len = kmer_len
self.nb_char = nb_char
self.nb_kmer = self.nb_char**self.kmer_len

def __call__(self, seqs):
"""Extracts kmer frequencies from integer sequences.
"""Extract kmer frequencies from integer sequences.
Parameters
----------
s: numpy array of size M x N of M integer sequences of length N.
seqs: :class:`numpy.ndarray`
:class:`numpy.ndarray` of size MxN, with M sequences of length N.
Returns
-------
freq: numpy array of size M x C of kmer frequencies.
:class:`numpy.ndarray`
:class:`numpy.ndarray` of size MxC with kmer frequencies.
"""

nb_seq, seq_len = seqs.shape
kmer_freq = np.zeros((nb_seq, self.nb_kmer), dtype=np.int32)
vec = np.array([self.nb_char**i for i in range(self.kmer_len)],
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/data/hdf.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Provides functionality to access HDF5 files."""
"""Functions for accessing HDF5 files."""

from __future__ import division
from __future__ import print_function
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/data/stats.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Computes statistic for binary CpG matrix.
"""Functions for computing statistic about binary CpG matrix.
CpG matrix x assumed to have shape
* [sites, cells] for per CpG statistics
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/data/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Provides general-purpose functionality."""
"""General purpose IO functions."""

from __future__ import division
from __future__ import print_function
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/evaluation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Functions to evaluate predictions performances."""
"""Functions for evaluating prediction performance."""

from __future__ import division
from __future__ import print_function
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Functions to compute performance metrics during training using Keras.
"""Functions for computing performance metrics during training using Keras.
Similar to :module:`evaluation`, but uses Keras tensors instead of numpy arrays
as input.
Expand Down
4 changes: 2 additions & 2 deletions deepcpg/models/cpg.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""CpG models.
Provides models trained with observed neighboring methylation states of multiple
cells.
Provides models trained with observed neighboring methylation states of
multiple cells.
"""

from __future__ import division
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/models/joint.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Joint models.
Provides models two join features of DNA and CpG model.
Provides models for joining features from DNA and CpG model.
"""
from __future__ import division
from __future__ import print_function
Expand Down
4 changes: 1 addition & 3 deletions deepcpg/models/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
"""Model utilities.
Provides functionality for building, training, and loading models.
"""Functions for building, training, and loading models.
"""

from __future__ import division
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/motifs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Functionality for analyzing motifs."""
"""Motif analysis."""

from __future__ import division
from __future__ import print_function
Expand Down
2 changes: 1 addition & 1 deletion deepcpg/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Provides general-purpose functionality."""
"""General-purpose functions."""

from __future__ import division
from __future__ import print_function
Expand Down

0 comments on commit 8be6c39

Please sign in to comment.