#### NOTES:
- the idea is to make two classes (or just modules): sliding window class (which can be used anywhere) and all the calculation (simgen). 
- seems like we can put all the slices into a collection and count all the ticks we need inside the one piece of code. but it'll be a mess

In [1]:
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord

## slicing multiple sequence alignment
http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec89

In [2]:
from Bio import AlignIO

In [3]:
align = AlignIO.read("./hbv_C_Bj_Ba.fasta", "fasta")

In [4]:
align

<<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 3215) at 1c728a648e0>

In [5]:
print(align)

Alignment with 3 rows and 3215 columns
TTCCACAGCATTCCACCAAGCTCTGCAGGATCCCAGAGTAAGGG...GAA AB048704.1_genotype_C_
CTCCACCACGTTCCACCAAACTCTTCAAGATCCCAGAGTCAGGG...GAA AB033555.1_Ba
CTCCACCACTTTCCACCAAACTCTTCAAGATCCCAGAGTCAGGG...GAA AB010291.1_Bj


You can also select a range of columns. For example, to pick out those same three rows we extracted earlier, but take just their first six columns:

 print(alignment[3:6, :6 ])
 
Alignment with 3 rows and 6 columns

AEGDDP COATB_BPM13/24-72

AEGDDP COATB_BPZJ2/1-49

AEGDDP Q9T0Q9_BPFD/1-49

### how to slice alignment into pieces using only biopython


In [6]:
print(align[:, 1:5])

Alignment with 3 rows and 4 columns
TCCA AB048704.1_genotype_C_
TCCA AB033555.1_Ba
TCCA AB010291.1_Bj


### lenght of the alignment

In [7]:
align.get_alignment_length()

3215

### that's strange. when index of the window end is larger than alignment length, it returns alignment to the end, but without an error like "index out of range". see cells below

# funtion of rolling window

In [17]:
def roll_window_along_alignment(in_file, window_len, window_step, region=False):
    
    align = AlignIO.read(in_file, "fasta")
    window_start = 0
    window_end = window_len
    window_step = window_step

    window_counter = 0
    sliced_alignment = {}
    while window_start < align.get_alignment_length():
        sliced_alignment[(window_start, window_end)] = align[:, window_start:window_end]
        window_start += window_step
        window_end += window_step

        window_counter += 1

    return sliced_alignment

In [22]:
def roll_window_along_alignment_region(in_file, window_len, window_step, region=False):
    
    align = AlignIO.read(in_file, "fasta")
    window_start = region[0]
    window_end = region[0] + window_len
    window_step = window_step

    window_counter = 0
    sliced_alignment = {}
    while window_start < region[1]:
        sliced_alignment[(window_start, window_end)] = align[:, window_start:window_end]
        window_start += window_step
        window_end += window_step

        window_counter += 1

    return sliced_alignment

In [32]:
from Bio import AlignIO

class RollingWindowOnAlignment():
    
    def __init__(self, in_file):
        self.align = AlignIO.read(in_file, "fasta")
        
    def roll_window_along_alignment(self, window_len, window_step):
    
        
        window_start = 0
        window_end = window_len
        window_step = window_step

        window_counter = 0
        sliced_alignment = {}
        while window_start < align.get_alignment_length():
            sliced_alignment[(window_start, window_end)] = self.align[:, window_start:window_end]
            window_start += window_step
            window_end += window_step

            window_counter += 1

        return sliced_alignment 
    
    def roll_window_along_alignment_region(self, window_len, window_step, region):
    
        
        window_start = region[0]
        window_end = region[0] + window_len
        window_step = window_step

        window_counter = 0
        sliced_alignment = {}
        while window_start < region[1]:
            sliced_alignment[(window_start, window_end)] = self.align[:, window_start:window_end]
            window_start += window_step
            window_end += window_step

            window_counter += 1

        return sliced_alignment

    
    

In [33]:
align_win = RollingWindowOnAlignment("./hbv_C_Bj_Ba.fasta")

In [35]:
sliced_alignment_whole = align_win.roll_window_along_alignment(window_len=100, window_step=50)

In [36]:
sliced_aligment_region = align_win.roll_window_along_alignment_region(window_len=100, window_step=50, region=[1000, 2000])

In [37]:
sliced_aligment_region

{(1000,
  1100): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c73304b1f0>,
 (1050,
  1150): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c73304a710>,
 (1100,
  1200): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c73311b610>,
 (1150,
  1250): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c7331340d0>,
 (1200,
  1300): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c7331341f0>,
 (1250,
  1350): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c7331343a0>,
 (1300,
  1400): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c733134550>,
 (1350,
  1450): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c733134700>,
 (1400,
  1500): <<class 'Bio.Align.MultipleSeqAlignment'> instance (3 records of length 100) at 1c7331348b0>,
 