forked from mnussbaum/Twitter-Predictor
-
Notifications
You must be signed in to change notification settings - Fork 0
/
join_by_line.py
64 lines (59 loc) · 1.52 KB
/
join_by_line.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
# Takes two files, f1 and f2. For each line index i, prints f1[i] \t f2[i]
# Stops when the shorter file runs out of lines.
# Use -h flag to ignore first line headers.
# Use -s flag to unscale SVM output back to real frequencies.
import sys
import getopt
import math
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], "f:g:o:hs")
except getopt.GetoptError:
print "Invalid options. Valid options are -f FILE1" + \
"-g FILE2 -o OUTPATH -h -s"
sys.exit(1)
in1 = None
in2 = None
outpath = None
head = False
scale = False
for o, a in opts:
if o == '-f':
in1 = a
if o == '-g':
in2 = a
if o == '-o':
outpath = a
if o == '-h':
head = True
if o == '-s':
scale = True
f1 = open(in1, 'r')
f2 = open(in2, 'r')
lines1 = f1.readlines()
lines2 = f2.readlines()
if head == True:
lines1 = lines1[1:]
lines2 = lines2[1:]
f1.close()
f2.close()
outfile = open(outpath, 'w')
L = min(len(lines1), len(lines2))
for i in range(L):
one = lines1[i].rstrip('\n')
two = lines2[i]
if scale == True:
two = str(unscale(eval(two))) + '\n'
output = one + '\t' + two
outfile.write(output)
def unscale(y):
if y <= -1:
return 0
if y >= 1:
return 1000
else:
x = (y + 1) * 0.5
return -1.0 / math.log(x)
if __name__ == "__main__":
main()