Skip to content

Commit

Permalink
Merge pull request #73 from lonelyjoeparker/master
Browse files Browse the repository at this point in the history
index function and file I/O exception handling
  • Loading branch information
arq5x committed Mar 9, 2016
2 parents e0cc0ab + c53e7c4 commit 58dc3d5
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
10 changes: 9 additions & 1 deletion poretools/Fast5File.py
Expand Up @@ -211,7 +211,7 @@ def open(self):
self.hdf5file = h5py.File(self.filename, 'r') self.hdf5file = h5py.File(self.filename, 'r')
return True return True
except Exception, e: except Exception, e:
logger.warning("Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename) logger.warning("Exception:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
return False return False


def guess_version(self): def guess_version(self):
Expand All @@ -222,12 +222,20 @@ def guess_version(self):
self.hdf5file["/Analyses/Basecall_2D_%03d/BaseCalled_template" % (self.group)] self.hdf5file["/Analyses/Basecall_2D_%03d/BaseCalled_template" % (self.group)]
return 'classic' return 'classic'
except KeyError: except KeyError:
logger.warning("KeyError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass
except AttributeError:
logger.warning("AttributeError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass pass


try: try:
self.hdf5file["/Analyses/Basecall_1D_%03d/BaseCalled_template" % (self.group)] self.hdf5file["/Analyses/Basecall_1D_%03d/BaseCalled_template" % (self.group)]
return 'metrichor1.16' return 'metrichor1.16'
except KeyError: except KeyError:
logger.warning("KeyError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass
except AttributeError:
logger.warning("AttributeError:Cannot open file: %s. Perhaps it is corrupt? Moving on.\n" % self.filename)
pass pass


return 'prebasecalled' return 'prebasecalled'
Expand Down
66 changes: 66 additions & 0 deletions poretools/index.py
@@ -0,0 +1,66 @@
import Fast5File
import datetime

############
#
# index
#
# A tool to extract
# all info needed to
# identify a pile of
# unsorted reads from
# multiple MinION
# sequencing
# experiments.
#
############

def run(parser, args):

print "source_filename\ttemplate_fwd_length\tcomplement_rev_length\t2d_length\tasic_id\tasic_temp\theatsink_temp\tchannel\texp_start_time\texp_start_time_string_date\texp_start_time_string_time\tstart_time\tstart_time_string_date\tstart_time_string_time\tduration\tfast5_version"

for fast5 in Fast5File.Fast5FileSet(args.files):


# run and flowcell parameters
asic_temp = fast5.get_asic_temp()
asic_id = fast5.get_asic_id()
heatsink_temp = fast5.get_heatsink_temp()
channel_number = fast5.get_channel_number()

# try and get timing info
try:
start_time = fast5.get_start_time()
start_time_string = datetime.datetime.fromtimestamp(float(start_time)).strftime("%Y-%b-%d (%a)\t%H:%M:%S")
exp_start_time = fast5.get_exp_start_time()
exp_start_time_string = datetime.datetime.fromtimestamp(float(exp_start_time)).strftime("%Y-%b-%d (%a)\t%H:%M:%S")
duration = fast5.get_duration()
except KeyError:
start_time = "Not found"
start_time_string = "NA\tNA"
exp_start_time = "Not found"
exp_start_time_string = "NA\tNA"
duration = "Not found"

# sequence file info
fast5_version = fast5.guess_version()

# read info
fastq_reads = fast5.get_fastqs('all')
length_template = None
length_complement = None
length_2d = None
if (len(fastq_reads) > 0):
length_template = len(fastq_reads[0].seq)
if (len(fastq_reads) > 2):
length_complement = len(fastq_reads[1].seq)
length_2d = len(fastq_reads[2].seq)

print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" % (
fast5.filename,
length_template,
length_complement,
length_2d,
asic_id, asic_temp, heatsink_temp,channel_number,exp_start_time,exp_start_time_string,start_time,start_time_string,duration,fast5_version)

fast5.close()
14 changes: 13 additions & 1 deletion poretools/poretools_main.py
Expand Up @@ -44,6 +44,8 @@ def run_subtool(parser, args):
import winner as submodule import winner as submodule
elif args.command == 'yield_plot': elif args.command == 'yield_plot':
import yield_plot as submodule import yield_plot as submodule
elif args.command == 'index':
import index as submodule


# run the chosen submodule. # run the chosen submodule.
submodule.run(parser, args) submodule.run(parser, args)
Expand Down Expand Up @@ -289,7 +291,7 @@ def main():
parser_nucdist.set_defaults(func=run_subtool) parser_nucdist.set_defaults(func=run_subtool)


######### #########
# nucdist # metadata
######### #########
parser_metadata = subparsers.add_parser('metadata', parser_metadata = subparsers.add_parser('metadata',
help='Return run metadata such as ASIC ID and temperature from a set of FAST5 files') help='Return run metadata such as ASIC ID and temperature from a set of FAST5 files')
Expand All @@ -298,6 +300,16 @@ def main():
parser_metadata.set_defaults(func=run_subtool) parser_metadata.set_defaults(func=run_subtool)




#########
# index
#########
parser_index = subparsers.add_parser('index',
help='Tabulate all file location info and metadata such as ASIC ID and temperature from a set of FAST5 files')
parser_index.add_argument('files', metavar='FILES', nargs='+',
help='The input FAST5 files.')
parser_index.set_defaults(func=run_subtool)


########## ##########
# qualdist # qualdist
########## ##########
Expand Down

0 comments on commit 58dc3d5

Please sign in to comment.