-
Notifications
You must be signed in to change notification settings - Fork 5
/
extract_sequence_using_name_query.jl
97 lines (91 loc) · 3.15 KB
/
extract_sequence_using_name_query.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
using ProgressMeter
### BE SURE TO REPLACE "|" in the sequence_name_query input
### Argument parsing needs refactoring LOL
fasta_input = ARGS[1]
sequence_name_query_vec = try
ARGS[2:(length(ARGS)-3)]
catch
try
ARGS[2:(length(ARGS)-2)]
catch
try
ARGS[2:(length(ARGS)-1)]
catch
ARGS[2:(length(ARGS)-0)]
end
end
end
fasta_output = try
ARGS[length(ARGS)-2]
catch
""
end
new_sequence_name = try
ARGS[length(ARGS)-1]
catch
""
end
add_gene_coordinates = try
parse(Bool, ARGS[length(ARGS)-0])
catch
false
end
if sequence_name_query_vec isa Vector
if length(sequence_name_query_vec) > 1
sequence_name_query = join(sequence_name_query_vec, " ")
else
sequence_name_query = sequence_name_query_vec[1]
end
end
if fasta_output == ""
fasta_output = string(join(split(fasta_input, ".")[1:(end-1)], "."), "-", sequence_name_query, ".out")
end
### Add escape characters in front of "."
if match(Regex("\\."), sequence_name_query) != nothing
sequence_name_query = replace(sequence_name_query, "."=> "\\.")
end
### Remove return character "\r"
if match(Regex("\\r"), sequence_name_query) != nothing
sequence_name_query = replace(sequence_name_query, "\r"=> "")
end
file_input = open(fasta_input, "r")
seekend(file_input); n = position(file_input)
seekstart(file_input)
pb = Progress(n)
while !eof(file_input)
line = readline(file_input)
if line[1] == '>'
while match(Regex(sequence_name_query), replace(line, "|"=>":")) != nothing
file_output = open(fasta_output, "a")
vec_line = split(line, " ")
if (new_sequence_name != "")
line = string(">", new_sequence_name)
end
if add_gene_coordinates
coordinates = try
vec_line[(match.(Regex("interval="), vec_line) .!= nothing) .| (match.(Regex("location="), vec_line) .!= nothing)][1]
catch
""
end
line = string(line, "(", coordinates, ")")
end
write(file_output, string(line, '\n'))
line = readline(file_input)
bool_test = line[1] != '>'
while bool_test
write(file_output, line)
line = readline(file_input)
bool_test = try
line[1] != '>'
catch
false
end
update!(pb, position(file_input))
end
write(file_output, '\n')
close(file_output)
end
end
update!(pb, position(file_input))
end
close(file_input)