In [1]:
class GenomicException(Exception):
    pass


class InconsistentChromosomesError(GenomicException):
    
    def __init__(self, one_chrom, other_chrom):
        self.one_chrom = one_chrom
        self.other_chrom = other_chrom
        super().__init__(f"Inconsistent chromosomes of "
                         f"comparing genomic ranges: "
                         f"{self.one_chrom} and {self.other_chrom}.")


class InconsistentGenomesError(GenomicException):

    def __init__(self, one_genome, other_genome):
        self.one_genome = one_genome
        self.other_genome = other_genome
        super().__init__(f"Inconsistent genomes of "
                         f"comparing genomic ranges: "
                         f"{self.one_genome} and {self.other_genome}.")

class EmptyGenomicRangesListError(GenomicException):
    
    def __init__(self, range_list_instance):
        self.range_list = range_list_instance
        super().__init__(f"Empty GenomicRangesList instance: {self.range_list}")
        

In [2]:
class GenomicRange:
    def __init__(self, chrom, start, end, strand,
                 genome=None, sequence_file_loc=None,
                 synteny=None, neighbours=None, **kwargs):
        self.chrom = chrom
        self.start = start
        self.end = end
        self.strand = strand
        self.genome = genome
        self.sequence_file_loc = sequence_file_loc
        self.synteny = GenomicRangesList([], self.genome) if synteny is None else synteny
        self.neighbours = GenomicRangesList([], self.genome) if neighbours is None else neighbours
    
    def __repr__(self):
        return f"GenomicRange({self.chrom}, {self.start}, {self.end}, {self.strand})"
    
    def __str__(self):
        return f"{self.chrom}\t{self.start}\t{self.end}\t{self.strand}"
    
    def distance(self, other):
        if self.chrom != other.chrom:
            raise InconsistentChromosomosesError(self.chrom, other.chrom)
        if self.genome != other.genome:
            raise InconsistentGenomesError(self.genome, other.genome)
        if self.end < other.start:
            return other.start - self.end
        if other.end < self.start:
            return other.end - self.start
        return 0
    
    def merge(self, other):
        if self.chrom != other.chrom:
            raise InconsistentChromosomosesError(self.chrom, other.chrom)
        if self.genome != other.genome:
            raise InconsistentGenomesError(self.genome, other.genome)
        start = min(self.start, other.start)
        end = max(self.end, other.end)
        return GenomicRange(chrom=self.chrom,
                            start=start,
                            end=end,
                            strand=".",
                            genome=self.genome)

    def align(self):
        pass

    def extract_fasta(self, outfilename):
        pass

In [None]:
class FastaSeqFile:
    def __init__(self, location):
        self.location = location
        self._file_obj = open(self.location, 'r')
        self.chromsizes = None
        self.chrom = None
        self.point = None
        self.chrom_coords = None
    
    def get_chromsizes(self):
        pass
    
    def get_fasta_by_coord(self, chrom, start, end, strand):
        pass

In [3]:
from sortedcontainers import SortedKeyList

In [4]:
class GenomicRangesList(SortedKeyList):

    def __init__(self, collection=[], genome=None):
        super().__init__(iterable=collection, key=lambda x: (x.chrom, x.start, x.end))
        self.genome = genome
    
    def merge(self, distance=0):
        if len(self) == 0:
            raise EmptyGenomicRangesListError(self)
        new_range_list = GenomicRangesList([], self.genome)
        new_range_list.add(self[0])
        new_index = 0
        for grange in self[1:]:
            try:
                if abs(new_range_list[new_index].distance(grange)) <= distance:
                    new_range_list[new_index] = new_range_list[new_index].merge(grange)
                else:
                    new_range_list.add(grange)
                    new_index += 1
            except InconsistentChromosomesError:
                new_range_list.add(grange)
                new_index += 1
        return new_range_list
    
    def get_neighbours(self, other, distance=0):
        if len(self) == 0:
            raise EmptyGenomicRangesListError(self)
        if len(other) == 0:
            raise EmptyGenomicRangesListError(other)
        self_index, other_index = 0, 0
        current_self, current_other = self_index, other_index
        while self_index < len(self) and other_index < len(other):
            try:
                if self[current_self].distance(other[current_other]) < - distance:
                    current_other += 1
                elif abs(self[current_self].distance(other[current_other])) <= distance:
                    self[current_self].neighbours.add(other[current_other])
                    current_other += 1
                else:
                    self_index += 1
                    current_self = self_index
                    current_other = other_index
            except InconsistentChromosomesError:
                self_index = current_self
                other_index = current_other
                if self[self_index].chrom < other[other_index].chrom:
                    self_index += 1
                    current_self = self_index
                else:
                    other_index += 1
                    current_other = other_index
            except IndexError:
                if current_other >= len(other):
                    current_other = other_index
                    current_self += 1
                    self_index = current_self
                else:
                    self_index += 1
                    current_self = self_index
                    current_other = other_index

In [5]:
sample_range_data = [[1, 10, 100, '+'],
                     [1, 50, 200, '-'],
                     [2, 200, 250, '+'],
                     [2, 10, 100, '-']]

In [6]:
sample_ranges = [GenomicRange(*i) for i in sample_range_data]

In [7]:
rc = GenomicRangesList(sample_ranges)

In [8]:
rc.add(GenomicRange(2, 25, 200, '+'))

In [11]:
print(rc)

RangeList([GenomicRange(1, 10, 100, +), GenomicRange(1, 50, 200, -), GenomicRange(2, 10, 100, -), GenomicRange(2, 25, 200, +), GenomicRange(2, 200, 250, +)], key=<function RangeList.__init__.<locals>.<lambda> at 0x7f81a02bfae8>)
