-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathGetGeneIDs.py
42 lines (35 loc) · 1.08 KB
/
GetGeneIDs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/env python
#Brian Hanratty
#bhanratt@asu.edu
#Accepts a Gene Symbol from argument and produces a file to be read by logcollector.sh
import sys
data_table = {}
try:
gene=sys.argv[1].upper()
except Exception:
print("You need to specify a gene. e.g. \"python GetGeneIDs.py tp53\"")
sys.exit(1)
print("Using gene: "+gene)
#Import Gene Dictionary
try:
f1=open('AllGeneIDs.txt')
for line in f1:
geneid = str.strip(line.split('|')[0])
genesymbol = str.strip(line.split('|')[1])
data_table.setdefault(genesymbol, set( )).add(geneid)
#Missing dictionary handling
except FileNotFoundError:
print("Missing dependency: AllGeneIDs.txt")
sys.exit(1)
#Do the thing
if gene in data_table:
matchedids=list(data_table[gene])
resultline= ' -e '.join(matchedids)
fw=open('grepline.txt', 'w')
fw.write('grep -e '+resultline+' $i.blr > $i.blr.tp53')
fw.close()
else:
print(gene+" not found in dictionary. Maybe a typo?")
sys.exit(1)
print('Generated grepline file for '+gene)
f1.close()