-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
__init__.py
61 lines (55 loc) · 1.96 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# Copyright 2001 by Katharine Lindner. All rights reserved.
# Copyright 2006 by PeterC. All rights reserved.
# Copyright 2007 by Michiel de Hoon. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Parser for files from NCBI's Gene Expression Omnibus (GEO).
http://www.ncbi.nlm.nih.gov/geo/
"""
import Record
def _read_key_value(line):
words = line[1:].split("=", 1)
try:
key, value = words
value = value.strip()
except ValueError:
key = words[0]
value = ""
key = key.strip()
return key, value
def parse(handle):
record = None
for line in handle:
line = line.strip('\n').strip('\r')
if not line:
continue # Ignore empty lines
c = line[0]
if c=='^':
if record:
yield record
record = Record.Record()
record.entity_type, record.entity_id = _read_key_value(line)
elif c=='!':
if line in ('!Sample_table_begin',
'!Sample_table_end',
'!Platform_table_begin',
'!Platform_table_end'):
continue
key, value = _read_key_value(line)
if key in record.entity_attributes:
if type(record.entity_attributes[key])==list:
record.entity_attributes[key].append(value)
else:
existing = record.entity_attributes[key]
record.entity_attributes[key] = [existing, value]
else:
record.entity_attributes[key] = value
elif c=='#':
key, value = _read_key_value(line)
assert key not in record.col_defs
record.col_defs[key] = value
else:
row = line.split("\t")
record.table_rows.append(row)
yield record