Permalink
Browse files

Added capture-c probe creation script

  • Loading branch information...
1 parent 7e6837f commit 15b0ffda4dec316fe377cb28b47d1f5e4312cea1 @buske committed Nov 8, 2014
Showing with 99 additions and 0 deletions.
  1. +99 −0 src/capture_c/fa2oligo.py
View
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+"""
+Given a single-sequence fasta, generate candidate oligonuclotide sequences
+around restriction sites in a fasta output.
+"""
+
+__author__ = 'Orion Buske (buske@cs.toronto.edu)'
+
+import os
+import sys
+import logging
+
+def read_fasta(filename):
+ parts = []
+ with open(filename) as ifp:
+ header = ifp.readline()
+ assert header.startswith('>')
+ seq_name = header.lstrip('>').strip()
+ logging.info('Designing probes for sequence: {}'.format(seq_name))
+ for line in ifp:
+ line = line.rstrip('\n')
+ assert not line.startswith('>')
+ parts.append(line)
+
+ return ''.join(parts)
+
+def generate_probes(fragment, probe_length, max_probe_distance):
+ probes = set()
+ if len(fragment) < probe_length:
+ return probes
+
+ # Number of steps to tile from each end
+ n_tiles = min(len(fragment), max_probe_distance) - probe_length
+ logging.debug('Tiling {} probes from each end'.format(n_tiles))
+
+ for offset in range(n_tiles):
+ oligo = fragment[offset:offset + probe_length]
+ probes.add(oligo)
+
+ # If smaller than max_probe_distance, probes already span full fragment
+ if len(fragment) > max_probe_distance:
+ # Otherwise, need to capture some probes from other end
+ for offset in range(n_tiles):
+ start = len(fragment) - offset - probe_length
+ end = len(fragment) - offset
+ oligo = fragment[start:end]
+ probes.add(oligo)
+
+ return probes
+
+def print_probes(probes):
+ for i, probe in enumerate(probes):
+ print('>probe_{}\n{}'.format(i, probe))
+
+def script(fasta_filename, restriction_site, probe_length, max_probe_distance):
+ seq = read_fasta(fasta_filename)
+ logging.info('Sequence is {}bp'.format(len(seq)))
+
+ fragments = seq.split(restriction_site)
+ logging.info('{} fragments expected after restriction with {}'.format(len(fragments), restriction_site))
+
+ probes = set()
+ for fragment in fragments:
+ logging.debug('Fragment of length: {}'.format(len(fragment)))
+ frag_probes = generate_probes(fragment, probe_length, max_probe_distance)
+ logging.debug('Designed {} probes'.format(len(frag_probes)))
+ probes.update(frag_probes)
+
+ logging.info('Generated {} unique {}bp probe sequences'.format(len(probes), probe_length))
+ print_probes(probes)
+
+def parse_args(args):
+ from argparse import ArgumentParser
+ description = __doc__.strip()
+
+ parser = ArgumentParser(description=description)
+ parser.add_argument('fasta_filename', metavar='FASTA',
+ help='Single-sequence fasta to design probes for')
+ parser.add_argument('--restriction-site', default='GATC',
+ help='Restriction site sequence (default for DpnII)')
+ parser.add_argument('--probe-length', default=120,
+ help='The length of each probe oligo')
+ parser.add_argument('--max-probe-distance', default=140,
+ help='Maximum distance from far end of probe to'
+ ' nearest restriction site')
+ parser.add_argument('--log', default='INFO',
+ help='Logging level (e.g. DEBUG, INFO, WARNING)')
+
+ return parser.parse_args(args)
+
+def main(args=sys.argv[1:]):
+ args = vars(parse_args(args))
+ logging.basicConfig(level=args.pop('log'))
+
+ script(**args)
+
+if __name__ == '__main__':
+ sys.exit(main())

0 comments on commit 15b0ffd

Please sign in to comment.