# Analysis #5: Isolation of oskar sequences according to Jeske 2015 Dimerization status
- Author: Savandara BESSE and Leo BLONDEL
- Creation: 07-18-2017
- Last modification: 12-10-2019

### Required inputs
- Oskar alignement: `../Data/01_Oskar_identification/oskar_tracker_results/oskar_filtered.aligned.fasta`

### Description
Creates specific alignments based on conformation of LOTUS (monomer vs. dimer)

### Generated outputs
Available in `./Data/02_Oskar_analyses/2.4/FASTA/`
- OSKAR_holometabola.fasta
- LOTUS_holometabola.fasta
- OSK_holometabola.fasta
- OSKAR_hemimetabola.fasta
- LOTUS_hemimetabola.fasta
- OSK_hemimetabola.fasta


In [19]:
import os
import numpy as np
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import generic_protein

In [20]:
handle = SeqIO.parse('../Data/01_Oskar_identification/oskar_tracker_results/oskar_filtered.aligned.fasta', 'fasta')
seqs = [s for s in handle]
families = np.unique([s.description.split('|')[-3] for s in seqs])

In [21]:
families

array(['Adelidae', 'Aeolothripidae', 'Ampulicidae', 'Aphelinidae',
       'Asilidae', 'Aulacidae', 'Baetidae', 'Bethylidae', 'Blaberidae',
       'Blattidae', 'Bombyliidae', 'Boreidae', 'Bothrideridae',
       'Braconidae', 'Calliphoridae', 'Capniidae', 'Carabidae',
       'Cecidomyiidae', 'Ceraphronidae', 'Ceratopogonidae', 'Chalcididae',
       'Chaoboridae', 'Chironomidae', 'Chloropidae', 'Chrysididae',
       'Chrysomelidae', 'Chrysopidae', 'Coccinellidae', 'Cucujidae',
       'Culicidae', 'Curculionidae', 'Cylindrotomidae', 'Cynipidae',
       'Diapriidae', 'Diprionidae', 'Dolichopodidae', 'Drosophilidae',
       'Ectobiidae', 'Encyrtidae', 'Eupelmidae', 'Eurytomidae',
       'Evaniidae', 'Figitidae', 'Formicidae', 'Gasteruptiidae',
       'Glossinidae', 'Gryllidae', 'Heteropterygidae', 'Hydroptilidae',
       'Ichneumonidae', 'Keroplatidae', 'Lepidopsocidae', 'Leuctridae',
       'Limnephilidae', 'Liposcelididae', 'Lonchopteridae',
       'Megalodontesidae', 'Megaspilidae', 'Mega

## Step 1: Collect sequences from specifc families

In [22]:
mapping = {
        'Gryllidae':"monomeric",
        'Formicidae':"monomeric",
        'Culicidae':"monomeric",
        'Pteromalidae':"dimeric",
        'Drosophilidae':"dimeric",
        'Tephritidae':"dimeric"
}

In [23]:
sequences = {
    'monomeric': [],
    'dimeric': []
}

In [24]:
for seq in seqs:
    fam = seq.description.split('|')[-3]
    if fam in mapping:
        sequences[mapping[fam]].append(seq)

In [25]:
len(sequences['monomeric'])

66

In [26]:
len(sequences['dimeric'])

86

## Step 2: Save Sequences

In [18]:
SeqIO.write(sequences['dimeric'], '../Data/02_Oskar_analyses/2.5/FASTA/OSKAR_Dimeric_alignment.fasta', 'fasta')
SeqIO.write(sequences['monomeric'], '../Data/02_Oskar_analyses/2.5/FASTA/OSKAR_Monomeric_alignment.fasta', 'fasta')

66