Skip to content

Commit

Permalink
Allowing for the use of first order and detailed TFFMs.
Browse files Browse the repository at this point in the history
  • Loading branch information
amathelier committed Sep 12, 2016
1 parent 2de9a55 commit c414815
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 12 deletions.
22 changes: 13 additions & 9 deletions DNAshapedTFBS.py
Expand Up @@ -9,7 +9,6 @@
# Local environment
# TODO: Test if TFFM is installed instead of using local env.
sys.path.append('{0}/../TFFM/'.format(PATH))
from hit_module import HIT
from sklearn.externals import joblib
from argparsing import *
from the_constants import BWTOOL, DNASHAPEINTER
Expand Down Expand Up @@ -42,16 +41,21 @@ def find_pssm_hits(pssm, seq_file):
return hits


def find_tffm_hits(xml, seq_file):
def find_tffm_hits(xml, seq_file, tffm_kind):
""" Predict hits in sequences using a TFFM. """
#import sys
#sys.path.append("/raid6/amathelier/TFFM+DNAshape/bin/TFFM/")
import tffm_module
from constants import TFFM_KIND # TFFM-framework
from hit_module import HIT
tffm = tffm_module.tffm_from_xml(xml, TFFM_KIND.FIRST_ORDER)
#tffm = tffm_module.tffm_from_xml(xml, TFFM_KIND.DETAILED)
return [hit for hit in tffm.scan_sequences(seq_file, only_best=True) if hit]
if tffm_kind == 'first_order':
tffm_kind = TFFM_KIND.FIRST_ORDER
elif tffm_kind == 'detailed':
tffm_kind = TFFM_KIND.DETAILED
else:
sys.exit('The type of TFFM should be "first_order" or "detailed".')
tffm = tffm_module.tffm_from_xml(xml, tffm_kind)
return [hit for hit in
tffm.scan_sequences(seq_file, only_best=True) if hit]


def construct_classifier_input(foreground, background):
Expand Down Expand Up @@ -130,7 +134,7 @@ def apply_classifier(hits, argu, bool4bits=False):

def tffm_apply_classifier(argu):
""" Apply the TFFM + DNA shape classifier. """
hits = find_tffm_hits(argu.tffm_file, argu.in_fasta)
hits = find_tffm_hits(argu.tffm_file, argu.in_fasta, argu.tffm_kind)
if hits:
apply_classifier(hits, argu)
else:
Expand Down Expand Up @@ -183,8 +187,8 @@ def train_classifier(fg_hits, bg_hits, argu, bool4bits=False):

def tffm_train_classifier(argu):
""" Train a TFFM + DNA shape classifier. """
fg_hits = find_tffm_hits(argu.tffm_file, argu.fg_fasta)
bg_hits = find_tffm_hits(argu.tffm_file, argu.bg_fasta)
fg_hits = find_tffm_hits(argu.tffm_file, argu.fg_fasta, argu.tffm_kind)
bg_hits = find_tffm_hits(argu.tffm_file, argu.bg_fasta, argu.tffm_kind)
train_classifier(fg_hits, bg_hits, argu)


Expand Down
15 changes: 12 additions & 3 deletions argparsing.py
Expand Up @@ -5,13 +5,17 @@
from DNAshapedTFBS import pssm_apply_classifier
from DNAshapedTFBS import binary_apply_classifier


def tffm_train_arg_parsing(subparsers):
""" Train the TFFM + DNA shape classifier. """
help_str = "Train the TFFM + DNA shape classifier."
parser_t = subparsers.add_parser('trainTFFM', help=help_str)
parser_t.add_argument('-T', '--tffmfile', required=True, dest='tffm_file',
action='store', type=str, help='TFFM XML file.')
action='store', type=str, help='TFFM XML file.')
parser_t.add_argument('-t', '--tffm_kind', required=False,
dest='tffm_kind', action='store', type=str,
choices=['first_order', 'detailed'],
default='first_order',
help='TFFM kind ("first_order" or "detailed").')
help_str = 'Input fasta file containing the foreground sequences.'
parser_t.add_argument('-i', '--fg_fasta', required=True, type=str,
dest='fg_fasta', action='store', help=help_str)
Expand Down Expand Up @@ -55,7 +59,12 @@ def tffm_apply_arg_parsing(subparsers):
help_str = 'Apply the TFFM + DNA shape classifier.'
parser_a = subparsers.add_parser('applyTFFM', help=help_str)
parser_a.add_argument('-T', '--tffmfile', required=True, dest='tffm_file',
action='store', type=str, help='TFFM XML file.')
action='store', type=str, help='TFFM XML file.')
parser_a.add_argument('-t', '--tffm_kind', required=False,
dest='tffm_kind', action='store', type=str,
choices=['first_order', 'detailed'],
default='first_order',
help='TFFM kind ("first_order" or "detailed").')
help_str = 'Input fasta file containing the sequences.'
parser_a.add_argument('-i', '--input_fasta', required=True, type=str,
dest='in_fasta', action='store', help=help_str)
Expand Down

0 comments on commit c414815

Please sign in to comment.