Skip to content

Commit

Permalink
Merge branch 'master' into mlin-dx-launcher
Browse files Browse the repository at this point in the history
  • Loading branch information
tomkinsc committed Jul 3, 2018
2 parents 725e118 + 2029e36 commit fe2c7df
Show file tree
Hide file tree
Showing 72 changed files with 8,127 additions and 253 deletions.
71 changes: 46 additions & 25 deletions illumina.py
Expand Up @@ -19,8 +19,11 @@
from collections import defaultdict
import concurrent.futures

import arrow

import util.cmd
import util.file
import util.misc
import tools.picard
from util.illumina_indices import IlluminaIndexReference

Expand Down Expand Up @@ -48,6 +51,11 @@ def parser_illumina_demux(parser=argparse.ArgumentParser()):
help='''Override SampleSheet. Input tab or CSV file w/header and four named columns:
barcode_name, library_name, barcode_sequence_1, barcode_sequence_2.
Default is to look for a SampleSheet.csv in the inDir.''')
parser.add_argument('--runInfo',
default=None,
dest="runinfo",
help='''Override RunInfo. Input xml file.
Default is to look for a RunInfo.xml file in the inDir.''')
parser.add_argument('--flowcell', help='Override flowcell ID (default: read from RunInfo.xml).', default=None)
parser.add_argument('--read_structure',
help='Override read structure (default: read from RunInfo.xml).',
Expand Down Expand Up @@ -91,18 +99,23 @@ def main_illumina_demux(args):
# prepare
illumina = IlluminaDirectory(args.inDir)
illumina.load()

if args.runinfo:
runinfo = RunInfo(args.runinfo)
else:
runinfo = illumina.get_RunInfo()
if args.flowcell:
flowcell = args.flowcell
else:
flowcell = illumina.get_RunInfo().get_flowcell()
flowcell = runinfo.get_flowcell()
if args.run_start_date:
run_date = args.run_start_date
else:
run_date = illumina.get_RunInfo().get_rundate_american()
run_date = runinfo.get_rundate_american()
if args.read_structure:
read_structure = args.read_structure
else:
read_structure = illumina.get_RunInfo().get_read_structure()
read_structure = runinfo.get_read_structure()
if args.sampleSheet:
samples = SampleSheet(args.sampleSheet, only_lane=args.lane)
else:
Expand All @@ -124,7 +137,7 @@ def main_illumina_demux(args):
tools.picard.CheckIlluminaDirectoryTool().execute(
illumina.get_BCLdir(),
args.lane,
illumina.get_RunInfo().get_read_structure(),
runinfo.get_read_structure(),
link_locs=link_locs
)
except subprocess.CalledProcessError as e:
Expand All @@ -145,7 +158,7 @@ def main_illumina_demux(args):
tools.picard.CheckIlluminaDirectoryTool().execute(
illumina.get_BCLdir(),
args.lane,
illumina.get_RunInfo().get_read_structure(),
runinfo.get_read_structure(),
link_locs=link_locs
)

Expand Down Expand Up @@ -195,8 +208,8 @@ def main_illumina_demux(args):
picardOpts['read_structure'] = read_structure
if args.threads:
picardOpts['num_processors'] = args.threads
if not picardOpts.get('sequencing_center') and illumina.get_RunInfo():
picardOpts['sequencing_center'] = illumina.get_RunInfo().get_machine()
if not picardOpts.get('sequencing_center') and runinfo:
picardOpts['sequencing_center'] = runinfo.get_machine()

if multiplexed_samples:
tools.picard.IlluminaBasecallsToSamTool().execute(
Expand Down Expand Up @@ -534,27 +547,35 @@ def get_flowcell(self):
log.warn("The provided flowcell ID is longer than 15 characters. Is that correct?")
return fc

def get_rundate_american(self):
@util.misc.memoize
def _get_rundate_obj(self):
"""
Access the text of the <Date> node in the RunInfo.xml file
and returns an arrow date object.
"""
rundate = self.root[0].find('Date').text
if len(rundate) == 6:
y, m, d = (rundate[0:2], rundate[2:4], rundate[4:6])
y = '20' + y
elif len(rundate) == 8:
y, m, d = (rundate[0:4], rundate[4:6], rundate[6:8])
else:
raise Exception()
return '%s/%s/%s' % (m, d, y)
# possible formats found in RunInfo.xml:
# "170712" (YYMMDD)
# "20170712" (YYYYMMDD)
# "6/27/2018 4:59:20 PM" (M/D/YYYY h:mm:ss A)
datestring_formats = [
"YYMMDD",
"YYYYMMDD",
"M/D/YYYY h:mm:ss A"
]
for datestring_format in datestring_formats:
try:
date_parsed = arrow.get(rundate, datestring_format)
return date_parsed
except arrow.parser.ParserError:
pass
raise arrow.parser.ParserError("The date string seen in RunInfo.xml ('%s') did not match known Illumina formats: %s" % (rundate,datestring_formats) )

def get_rundate_american(self):
return str(self._get_rundate_obj().format("MM/DD/YYYY"))

def get_rundate_iso(self):
rundate = self.root[0].find('Date').text
if len(rundate) == 6:
y, m, d = (rundate[0:2], rundate[2:4], rundate[4:6])
y = '20' + y
elif len(rundate) == 8:
y, m, d = (rundate[0:4], rundate[4:6], rundate[6:8])
else:
raise Exception()
return '%s-%s-%s' % (y, m, d)
return str(self._get_rundate_obj().format("YYYY-MM-DD"))

def get_machine(self):
return self.root[0].find('Instrument').text
Expand Down

0 comments on commit fe2c7df

Please sign in to comment.