forked from biopython/biopython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Search.py
153 lines (129 loc) · 5.45 KB
/
Search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# BLASTN 2.0a19MP-WashU [05-Feb-1998] [Build decunix3.2 01:53:29 05-Feb-1998]
# BLASTP 2.0.4 [Feb-24-1998]
class Algorithm:
def __init__(self, name, version, description = ""):
self.name = name # 'blastx', 'blastn', etc.
self.version = version # '2.1.2' or '2.0a19MP-WashU'
self.description = description # '[05-Feb-1998] [Build dec ...1998]'
# Query= YAL001C YAL001C, Chr I from 147596 to 147665, and 147756 to 151168,
# reverse complement
# (3483 letters)
class Query:
def __init__(self, name, accession, description, length):
self.name = name # 'YAL001C'
self.accession = accession # or None if missing
self.description = description # 'YAL001C, Chr I from 147596 to ... '
self.length = length # 3483
# Database: ArabidopsisN
# 66,211 sequences; 69,074,155 total letters.
class Database:
def __init__(self, name, letters, entries):
self.name = name # ArabidopsisN
self.letters = letters # 69074155
self.entries = entries # 66211
class TableInfo:
def __init__(self, full_description, info):
self.__dict__.update(info)
self.full_description = full_description
class Search:
def __init__(self, algorithm, query, database, table, hits,
parameters, statistics):
self.algorithm = algorithm
self.query = query
self.database = database
self.table = table
self.hits = hits
self.parameters = parameters
self.statistics = statistics
class Hit:
def __init__(self, name, description, accession, length,
algorithm, hsps = None):
self.name = name
self.description = description
self.accession = accession
self.length = length
self.algorithm = algorithm
if hsps is None:
hsps = []
self.hsps = hsps
def __len__(self):
return self.length
# >GB_PL:ATF18F4 AL021637 Arabidopsis thaliana DNA chromosome 4, BAC clone
# F18F4 (ESSAII project). 2/98
# Length = 93,646
#
# Minus Strand HSPs:
#
# Score = 226 (33.9 bits), Expect = 0.80, P = 0.55
# Identities = 98/142 (69%), Positives = 98/142 (69%), Strand = Minus / Plus
# [...lines deleted...]
# Query: 2486 ATATCAAGCAATTTGATAAGATCTAG 2461
# A AT A C ATT GA AAGATC AG
# Sbjct: 85387 AGATTTACCTATT-GAGAAGATCAAG 85411
# computed from the strings
class _SeqLength:
def __init__(self, length, identical, positives, gaps):
self.length = length
self.identical = identical
self.positives = positives
self.gaps = gaps
def __len__(self):
return self.length
def __getattr__(self, name):
if name == "frac_identical":
return float(self.identical) / self.length
elif name == "frac_positives":
return float(self.positives) / self.length
raise AttributeError(name)
class HomologySeq(_SeqLength):
def __init__(self, seq, identical, positives, gaps):
_SeqLength.__init__(self, len(seq), identical, positives, gaps)
self.seq = seq
class HSPSeq(_SeqLength):
def __init__(self, name, seq, location, identical, positives, gaps):
_SeqLength.__init__(self, len(seq), identical, positives, gaps)
self.name = name
self.seq = seq
self.location = location
class HSP(_SeqLength):
def __init__(self,
query_seq, # ATATCAAGCAATTTGATAAGATCTAG
homology_seq, # A AT A C ATT GA AAGATC AG
subject_seq, # AGATTTACCTATT-GAGAAGATCAAG
query_location, # (2486, 2461, negative strand)
subject_location, # (85387, 85411)
query_name, # Query (or None)
subject_name, # Sbjct (or None)
algorithm, # an Algorithm
info, # contains Key/value pairs
homology_gaps = None, # Is this needed?
):
assert len(query_seq) == len(homology_seq) == len(subject_seq), \
(query_seq, homology_seq, subject_seq)
self.algorithm = algorithm
query_gaps = query_seq.count("-")
subject_gaps = subject_seq.count("-")
if homology_gaps is None:
homology_gaps = query_gaps + subject_gaps
self.info = info
identical = info["identical"]
# bioperl calls this 'conserved'
positives = info.get("positives", identical)
_SeqLength.__init__(self, len(query_seq), identical,
positives, homology_gaps)
self.query = HSPSeq(name = query_name,
seq = query_seq,
location = query_location,
identical = identical,
positives = positives,
gaps = query_gaps)
self.subject = HSPSeq(name = subject_name,
seq = subject_seq,
location = subject_location,
identical = identical,
positives = positives,
gaps = subject_gaps)
self.homology = HomologySeq(seq = homology_seq,
identical = identical,
positives = positives,
gaps = homology_gaps)