-
Notifications
You must be signed in to change notification settings - Fork 0
/
make_input_4_gaf.py
executable file
·86 lines (70 loc) · 2.69 KB
/
make_input_4_gaf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#A script to produce a correspondence file between the ENSEMBL codes of my transcriptome and their respective GO terms
#Because I have both MUS and ORYC ENSEMBL annotations, this script will work with both
# AUTHOR: Mafalda S. Ferreira, 2014 @ Porto
# ---------------------------------------------------------------------------------------------------------------------------
# USAGE: python make_input_4_gaf.py corr_btw_ory_ensembl&go corr_btw_mus_ensembl&go annot_ory2ensembl annot_mus2ensembl output
# ---------------------------------------------------------------------------------------------------------------------------
# FILES:
# corr_btw_ory_ensembl&go = file obtained from biomart that has the correspondence between ALL ORY ensembl gene codes and GO terms
# corr_btw_mus_ensembl&go = file obtained from biomart that has the correspondence between ALL MUS ensembl gene codes and GO terms
# annot_ory2ensembl = a list with the ORY ENSEMBL codes annotated in my transcriptome ATTENTION: Only one column with ENSEMBL gene codes
# annot_mus2ensembl = a list with the MUS ENSEMBL codes annotated in my transcriptome. ATTENTION: Only one column with ENSEMBL gene codes
# ----------------------------------------------------------------------------------------------------------------------------
import sys
ref_ory=sys.argv[1]
ref_mus=sys.argv[2]
infile_ory=sys.argv[3]
infile_mus=sys.argv[4]
output=sys.argv[5]
print 'Hello! Check if your files are well formated before usage.'
def makeorydict(ory):
oryref=open(ory,'r')
orydict={}
for line in oryref:
# print line
line=line.strip('\n')
fields=line.split('\t')
#print fields
key=fields[0]
value=fields[1]+','
#print key,value
if key not in orydict:
orydict.setdefault(key,value)
elif key in orydict:
orydict[key]=orydict[key]+value
return orydict
def makemusdict(mus):
musref=open(mus,'r')
musdict={}
for line in musref:
# print line
line=line.strip('\n')
fields=line.split('\t')
#print fields
key=fields[0]
value=fields[1]+','
#print key,value
if key not in musdict:
musdict.setdefault(key,value)
elif key in musdict:
musdict[key]=musdict[key]+value
return musdict
dictory=makeorydict(ref_ory)
dictmus=makemusdict(ref_mus)
annotmus=open(infile_mus,'r')
annotory=open(infile_ory,'r')
outfile=open(output,'w')
for line in annotory:
myfields=line.strip()
#print myfields
if myfields in dictory:
new_line_ory=''.join(myfields.strip()+' '+dictory[myfields])
outfile.write(new_line_ory+'\n')
for line in annotmus:
myfields=line.strip()
#print myfields
if myfields in dictmus:
new_line_mus=''.join(myfields.strip()+' '+dictmus[myfields])
#print new_line_mus
outfile.write(new_line_mus+'\n')
outfile.close()