Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bcftbx/IlluminaData: extend data items exposed by 'IlluminaRunInfo' class #172

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 16 additions & 2 deletions bcftbx/IlluminaData.py
Expand Up @@ -187,7 +187,11 @@ class IlluminaRunInfo(object):
Extracts basic information from a RunInfo.xml file:

run_id : the run id e.g.'130805_PJ600412T_0012_ABCDEZXDYY'
run_number : the run number e.g. '12'
run_number : the run number e.g. '0012'
instrument : the instrument name e.g. 'PJ600412T'
date : the run date e.g. '130805'
flowcell : the flowcell id e.g. 'ABCDEZXDYY'
lane_count : the flowcell lane count e.g. 8
bases_mask : bases mask string derived from the read information
e.g. 'y101,I6,y101'
reads : a list of Python dictionaries (one per read)
Expand All @@ -212,13 +216,23 @@ def __init__(self,runinfo_xml):
self.runinfo_xml = runinfo_xml
self.run_id = None
self.run_number = None
self.instrument = None
self.flowcell = None
self.date = None
self.lane_count = None
self.reads = []
# Process contents
#
doc = xml.dom.minidom.parse(self.runinfo_xml)
run_tag = doc.getElementsByTagName('Run')[0]
self.run_id = run_tag.getAttribute('Id')
self.run_number = run_tag.getAttribute('Number')
self.instrument = \
doc.getElementsByTagName('Instrument')[0].firstChild.nodeValue
self.flowcell = \
doc.getElementsByTagName('Flowcell')[0].firstChild.nodeValue
self.date = doc.getElementsByTagName('Date')[0].firstChild.nodeValue
flowcell_layout_tag = doc.getElementsByTagName('FlowcellLayout')[0]
self.lane_count = flowcell_layout_tag.getAttribute('LaneCount')
read_tags = doc.getElementsByTagName('Read')
for read_tag in read_tags:
self.reads.append({'number': read_tag.getAttribute('Number'),
Expand Down
50 changes: 50 additions & 0 deletions bcftbx/test/test_IlluminaData.py
Expand Up @@ -4,6 +4,7 @@
from bcftbx.IlluminaData import *
from bcftbx.mock import MockIlluminaRun
from bcftbx.mock import MockIlluminaData
from bcftbx.mock import RunInfoXml
from bcftbx.TabFile import TabDataLine
import bcftbx.utils
import unittest
Expand Down Expand Up @@ -158,6 +159,55 @@ def test_illuminarun_nextseq_missing_directory(self):
self.assertEqual(run.lanes,[])
self.assertEqual(run.cycles,None)

class TestIlluminaRunInfo(unittest.TestCase):
"""
Tests for the IlluminaRunInfo class
"""
def setUp(self):
# Create a temporary working directory
self.tmpdir = tempfile.mkdtemp()

def tearDown(self):
# Remove the test directory
try:
os.rmdir(self.tmpdir)
except Exception:
pass

def test_illuminaruninfo(self):
"""
IlluminaRunInfo: check data is extracted
"""
run_info_xml = os.path.join(self.tmpdir,"RunInfo.xml")
with open(run_info_xml,'wt') as fp:
fp.write(RunInfoXml.create(
run_name="151125_NB500968_0003_000000000-ABCDE1XX",
bases_mask="y101,I8,I8,y101",
nlanes=8,
tilecount=16))
run_info = IlluminaRunInfo(run_info_xml)
self.assertEqual(run_info.run_id,
"151125_NB500968_0003_000000000-ABCDE1XX")
self.assertEqual(run_info.date,'151125')
self.assertEqual(run_info.instrument,'NB500968')
self.assertEqual(run_info.run_number,'0003')
self.assertEqual(run_info.flowcell,'000000000-ABCDE1XX')
self.assertEqual(run_info.lane_count,'8')
self.assertEqual(run_info.bases_mask,"y101,I8,I8,y101")
self.assertEqual(len(run_info.reads),4)
self.assertEqual(run_info.reads[0]['number'],'1')
self.assertEqual(run_info.reads[0]['num_cycles'],'101')
self.assertEqual(run_info.reads[0]['is_indexed_read'],'N')
self.assertEqual(run_info.reads[1]['number'],'2')
self.assertEqual(run_info.reads[1]['num_cycles'],'8')
self.assertEqual(run_info.reads[1]['is_indexed_read'],'Y')
self.assertEqual(run_info.reads[2]['number'],'3')
self.assertEqual(run_info.reads[2]['num_cycles'],'8')
self.assertEqual(run_info.reads[2]['is_indexed_read'],'Y')
self.assertEqual(run_info.reads[3]['number'],'4')
self.assertEqual(run_info.reads[3]['num_cycles'],'101')
self.assertEqual(run_info.reads[3]['is_indexed_read'],'N')

class BaseTestIlluminaData(unittest.TestCase):
"""
Base class for testing IlluminaData, IlluminaProject and IlluminaSample
Expand Down