-
Notifications
You must be signed in to change notification settings - Fork 1
/
hippiematch.py
64 lines (56 loc) · 2.01 KB
/
hippiematch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python2
import sys
import csv
import os
def main(hippiefile, pairfile, out):
"""A script to take a file output by the HIPPIE script and match the pairs in it to a file containing protein pairs:
input:
hippiefile - output of HIPPIE script
pairfile - list of protein pairs
output:
out - all the pairs it could find in the HIPPIE file with their confidence values"""
# Reading in the HIPPIE file
#initialise csv reader
c = csv.reader(open(hippiefile), delimiter="\t")
#make dictionary using frozensets as keys with the confidence scores as values
hippieids = {}
for line in c:
k = frozenset([line[1],line[3]])
hippieids[k] = line[4]
# Reading in the file of protein pairs
#initialise csv reader
c = csv.reader(open(pairfile), delimiter="\t")
#make dictionary using frozensets as keys:
posids = {}
for line in c:
line = frozenset(line)
posids[line] = 1
# Write new file:
#then rewrite the training.positive.HIPPIE.txt file:
c = csv.writer(open(out, "w"), delimiter="\t")
for k in hippieids.keys():
try:
if posids[k]:
l = list(k)
try:
c.writerow([l[0],l[1],hippieids[k]])
except:
#ignore self-interactions
pass
except KeyError:
#ignore missing pairs
pass
#how many lines does the new file have?
count = int(os.popen("wc -l < " + out).read())
# Report how well it went:
print "%i of %i pairs matched."%(count,len(posids.keys()))
return None
if __name__=="__main__":
if sys.argv[1] == "-h":
print "Usage: python2 hippiematch.py hippiefile pairfile outputfile"
print "Where: "
print " hippiefile is the out of the HIPPIE script"
print " pairfile is the file of protein pairs to match"
print " outputfile is the name of the file to output to"
else:
main(*sys.argv[1:])