Permalink
Browse files

Merge pull request #73 from lonelyjoeparker/master

index function and file I/O exception handling
  • Loading branch information...
arq5x committed Mar 9, 2016
2 parents e0cc0ab + c53e7c4 commit 58dc3d51715d03510690c304f52863cd41844349
Showing with 88 additions and 2 deletions.
  1. +9 −1 poretools/Fast5File.py
  2. +66 −0 poretools/index.py
  3. +13 −1 poretools/poretools_main.py
View
@@ -211,7 +211,7 @@ def open(self):
self.hdf5file = h5py.File(self.filename, 'r')
return True
except Exception, e:
logger.warning("Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
logger.warning("Exception:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
return False
def guess_version(self):
@@ -222,12 +222,20 @@ def guess_version(self):
self.hdf5file["/Analyses/Basecall_2D_%03d/BaseCalled_template" % (self.group)]
return 'classic'
except KeyError:
logger.warning("KeyError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass
except AttributeError:
logger.warning("AttributeError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass
try:
self.hdf5file["/Analyses/Basecall_1D_%03d/BaseCalled_template" % (self.group)]
return 'metrichor1.16'
except KeyError:
logger.warning("KeyError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass
except AttributeError:
logger.warning("AttributeError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass
return 'prebasecalled'
View
@@ -0,0 +1,66 @@
import Fast5File
import datetime
############
#
# index
#
# A tool to extract
# all info needed to
# identify a pile of
# unsorted reads from
# multiple MinION
# sequencing
# experiments.
#
############
def run(parser, args):
print "source_filename\ttemplate_fwd_length\tcomplement_rev_length\t2d_length\tasic_id\tasic_temp\theatsink_temp\tchannel\texp_start_time\texp_start_time_string_date\texp_start_time_string_time\tstart_time\tstart_time_string_date\tstart_time_string_time\tduration\tfast5_version"
for fast5 in Fast5File.Fast5FileSet(args.files):
# run and flowcell parameters
asic_temp = fast5.get_asic_temp()
asic_id = fast5.get_asic_id()
heatsink_temp = fast5.get_heatsink_temp()
channel_number = fast5.get_channel_number()
# try and get timing info
try:
start_time = fast5.get_start_time()
start_time_string = datetime.datetime.fromtimestamp(float(start_time)).strftime("%Y-%b-%d (%a)\t%H:%M:%S")
exp_start_time = fast5.get_exp_start_time()
exp_start_time_string = datetime.datetime.fromtimestamp(float(exp_start_time)).strftime("%Y-%b-%d (%a)\t%H:%M:%S")
duration = fast5.get_duration()
except KeyError:
start_time = "Not found"
start_time_string = "NA\tNA"
exp_start_time = "Not found"
exp_start_time_string = "NA\tNA"
duration = "Not found"
# sequence file info
fast5_version = fast5.guess_version()
# read info
fastq_reads = fast5.get_fastqs('all')
length_template = None
length_complement = None
length_2d = None
if (len(fastq_reads) > 0):
length_template = len(fastq_reads[0].seq)
if (len(fastq_reads) > 2):
length_complement = len(fastq_reads[1].seq)
length_2d = len(fastq_reads[2].seq)
print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
fast5.filename,
length_template,
length_complement,
length_2d,
asic_id, asic_temp, heatsink_temp,channel_number,exp_start_time,exp_start_time_string,start_time,start_time_string,duration,fast5_version)
fast5.close()
@@ -44,6 +44,8 @@ def run_subtool(parser, args):
import winner as submodule
elif args.command == 'yield_plot':
import yield_plot as submodule
elif args.command == 'index':
import index as submodule
# run the chosen submodule.
submodule.run(parser, args)
@@ -289,7 +291,7 @@ def main():
parser_nucdist.set_defaults(func=run_subtool)
#########
# nucdist
# metadata
#########
parser_metadata = subparsers.add_parser('metadata',
help='Return run metadata such as ASIC ID and temperature from a set of FAST5 files')
@@ -298,6 +300,16 @@ def main():
parser_metadata.set_defaults(func=run_subtool)
#########
# index
#########
parser_index = subparsers.add_parser('index',
help='Tabulate all file location info and metadata such as ASIC ID and temperature from a set of FAST5 files')
parser_index.add_argument('files', metavar='FILES', nargs='+',
help='The input FAST5 files.')
parser_index.set_defaults(func=run_subtool)
##########
# qualdist
##########

0 comments on commit 58dc3d5

Please sign in to comment.