Skip to content

Commit 4b16fe4

Browse files
committed
fix: primitive-obsession - use open intervals, drop __future__
1 parent 12e441a commit 4b16fe4

File tree

1 file changed

+16
-18
lines changed

1 file changed

+16
-18
lines changed

content/primitive_obsession.md

+16-18
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def read_in_gene(read, genome):
4141
for chrom in genome:
4242
if chrom[0] == read[0]:
4343
for gene in chrom[2]:
44-
starts_in = gene[1][1] <= read[1] < gene[1][2]
45-
ends_in = gene[1][1] < read[2] <= gene[1][2]
44+
starts_in = gene[1][1] <= read[1] <= gene[1][2]
45+
ends_in = gene[1][1] <= read[2] <= gene[1][2]
4646
same_sign = gene[1][3] == read[3]
4747
if starts_in or ends_in:
4848
return True
@@ -51,7 +51,7 @@ def read_in_gene(read, genome):
5151

5252
> The code in this post was tested under python 3.10.4
5353
54-
We define a genome as a complex nested structure of built-in types (list, tuple, str, int). Then we write a function which will check whether an input read falls on a gene from a given genome.
54+
We define a genome as a complex nested structure of built-in types (list, tuple, str, int). Then we write a function which will check whether an input read falls on a gene from a given genome. All intervals are 0-based and open.
5555

5656
Even with intuitive variable names (`gene` and not `g`) it takes a lot of mental effort to understand the nesting, and translate the numeric indices (e.g. `gene[1][1]`) into their meaning 🪆. This can make the code harder to understand and maintain.
5757

@@ -129,7 +129,7 @@ from typing import Literal, NamedTuple, Optional
129129

130130

131131
class Interval(NamedTuple):
132-
"""An optionally stranded genomic interval."""
132+
"""An optionally stranded, 0-based, open genomic interval."""
133133

134134
chrom: str
135135
start: int
@@ -166,8 +166,8 @@ def read_in_gene(read: Interval, genome: list[Chrom]) -> bool:
166166
for chrom in genome:
167167
if chrom.name == read.chrom:
168168
for gene in chrom.genes:
169-
starts_in = gene.coord.start <= read.start < gene.coord.end
170-
ends_in = gene.coord.start < read.end <= gene.coord.end
169+
starts_in = gene.coord.start <= read.start <= gene.coord.end
170+
ends_in = gene.coord.start <= read.end <= gene.coord.end
171171
same_sign = gene.coord.sign == read.sign
172172
if (starts_in or ends_in) and same_sign:
173173
return True
@@ -181,13 +181,12 @@ Dataclasses offer the flexibility of python classes with minimal administrative
181181
First, we can define an interval dataclass. The simplest definition we can write looks almost identical to the previous `NamedTuple` subclass:
182182

183183
```python
184-
from __future__ import annotations
185184
from dataclasses import dataclass, field
186185
from typing import Literal, Optional
187186

188187
@dataclass
189188
class Interval:
190-
"""An optionally stranded genomic interval."""
189+
"""An optionally stranded, 0-based, open genomic interval."""
191190

192191
chrom: str
193192
start: int
@@ -211,7 +210,7 @@ For example, below, we use `order=True` to say that instances of `Interval` can
211210
```python
212211
@dataclass(order=True)
213212
class Interval:
214-
"""A stranded genomic interval."""
213+
"""An optionally stranded, 0-based, open genomic interval."""
215214

216215
chrom: str = field(compare=True)
217216
start: int = field(compare=True)
@@ -227,7 +226,7 @@ It makes sense to move this logic into the `Interval` definition, because we wil
227226
```python
228227
@dataclass(order=True)
229228
class Interval:
230-
"""A stranded genomic interval."""
229+
"""An optionally stranded, 0-based, open genomic interval."""
231230

232231
chrom: str = field(compare=True)
233232
start: int = field(compare=True)
@@ -236,8 +235,8 @@ class Interval:
236235

237236
def __contains__(self, other: Interval) -> bool:
238237
"""Checks if another interval overlaps with self."""
239-
starts_in = self.start <= other.start < self.end
240-
ends_in = self.start < other.end <= self.end
238+
starts_in = self.start <= other.start <= self.end
239+
ends_in = self.start <= other.end <= self.end
241240
same_sign = self.sign == other.sign
242241
return (starts_in or ends_in) and same_sign
243242
```
@@ -317,8 +316,8 @@ def read_in_gene(read, genome):
317316
for chrom in genome:
318317
if chrom[0] == read[0]:
319318
for gene in chrom[2]:
320-
starts_in = gene[1][1] <= read[1] < gene[1][2]
321-
ends_in = gene[1][1] < read[2] <= gene[1][2]
319+
starts_in = gene[1][1] <= read[1] <= gene[1][2]
320+
ends_in = gene[1][1] <= read[2] <= gene[1][2]
322321
same_sign = gene[1][3] == read[3]
323322
if starts_in or ends_in:
324323
return True
@@ -345,7 +344,6 @@ In the example below, we define `Interval` as a subclass of `BaseModel` to acces
345344
Here is the full example rewritten with pydantic:
346345

347346
```python
348-
from __future__ import annotations
349347
from typing import Any, Literal, Optional
350348
from pydantic import BaseModel, conint, root_validator, NonNegativeInt
351349

@@ -354,7 +352,7 @@ pos = conint(ge=0)
354352

355353

356354
class Interval(BaseModel):
357-
"""A stranded genomic interval."""
355+
"""An optionally stranded, 0-based, open genomic interval."""
358356

359357
chrom: str
360358
start: pos
@@ -363,8 +361,8 @@ class Interval(BaseModel):
363361

364362
def __contains__(self, other: Interval) -> bool:
365363
"""Checks if another interval overlaps with self."""
366-
starts_in = self.start <= other.start < self.end
367-
ends_in = self.start < other.end <= self.end
364+
starts_in = self.start <= other.start <= self.end
365+
ends_in = self.start <= other.end <= self.end
368366
same_sign = self.sign == other.sign
369367
return starts_in or ends_in and same_sign
370368

0 commit comments

Comments
 (0)