-
Notifications
You must be signed in to change notification settings - Fork 1
/
count_ROH.py
executable file
·102 lines (52 loc) · 1.76 KB
/
count_ROH.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import sys
CURRENT_SAMPLE = ""
PREVIOUS_ROH_STATE = "A_not_roh"
PREVIOUS_CHR = 0
PREVIOUS_WINDOW_START = -2
PREVIOUS_WINDOW_END = -2
ROH_COUNTER = 0
CURRENT_SAMPLE = "N\A"
SMUDGE = int(sys.argv[2])
with open(sys.argv[1]) as FILE:
for LINE in FILE:
SPLINE = LINE.rstrip("\n").split(" ")
CURRENT_ROH_STATE = SPLINE[10]
CURRENT_CHR = int(SPLINE[0])
CURRENT_WINDOW_START = int(SPLINE[1])
CURRENT_WINDOW_END = int(SPLINE[2])
if SPLINE[6] != CURRENT_SAMPLE:
if CURRENT_SAMPLE != "N\A":
if PREVIOUS_ROH_STATE == "B_roh":
ROH_COUNTER += 1
print CURRENT_SAMPLE + " " + str(ROH_COUNTER)
#if we are at a new sample, update and move on
CURRENT_SAMPLE = SPLINE[6]
PREVIOUS_ROH_STATE = CURRENT_ROH_STATE
PREVIOUS_CHR = CURRENT_CHR
PREVIVOUS_WINDOW_END = CURRENT_WINDOW_END
PREVIOUS_WINDOW_START = CURRENT_WINDOW_START
ROH_COUNTER = 0
continue
if (CURRENT_ROH_STATE == "B_roh"):
#check in case we are not in a new ROH due to the consequetive break
if CURRENT_CHR != PREVIOUS_CHR:
ROH_COUNTER += 1
if (CURRENT_CHR == PREVIOUS_CHR) and (CURRENT_WINDOW_START != PREVIVOUS_WINDOW_END+1) :
#taking into account the SMUDGE factor of 1Mb
if (CURRENT_WINDOW_START > (PREVIVOUS_WINDOW_END + SMUDGE + 1)):
ROH_COUNTER += 1
#if we are not in a ROH
else:
#check in case we are no longer in a ROH
if PREVIOUS_ROH_STATE == "B_roh":
ROH_COUNTER += 1
PREVIOUS_ROH_STATE = CURRENT_ROH_STATE
PREVIOUS_CHR = CURRENT_CHR
PREVIVOUS_WINDOW_END = CURRENT_WINDOW_END
PREVIOUS_WINDOW_START = CURRENT_WINDOW_START
CURRENT_SAMPLE = SPLINE[6]
#catch any final ROH
if PREVIOUS_ROH_STATE == "B_roh":
ROH_COUNTER += 1
#print the last sample
print CURRENT_SAMPLE + " " + str(ROH_COUNTER)