# Task 1

### Data

In [1]:
infile = "C:/Users/Admin/Downloads/prot.fasta"

### Functions

In [2]:
def my_own_fasta_parser(infile):

    sequences = {}

    with open(infile, 'r') as file:
        for line in file:
            if line.startswith('>'): 
                seq_id = line[1:-1]
            else:
                sequences.update({seq_id : line[:-1]})

    return sequences

### Output

In [3]:
my_own_fasta_parser(infile)

{'seq0': 'FQTWEEFSRAAEKLYLADPMKVRVVLKYRHVDGNLCIKVTDDLVCLVYRTDQAQDVKKIEKF',
 'seq1': 'KYRTWEEFTRAAEKLYQADPMKVRVVLKYRHCDGNLCIKVTDDVVCLLYRTDQAQDVKKIEKFHSQLMRLME LKVTDNKECLKFKTDQAQEAKKMEKLNNIFFTLM',
 'seq2': 'EEYQTWEEFARAAEKLYLTDPMKVRVVLKYRHCDGNLCMKVTDDAVCLQYKTDQAQDVKKVEKLHGK',
 'seq3': 'MYQVWEEFSRAVEKLYLTDPMKVRVVLKYRHCDGNLCIKVTDNSVCLQYKTDQAQDVK',
 'seq4': 'EEFSRAVEKLYLTDPMKVRVVLKYRHCDGNLCIKVTDNSVVSYEMRLFGVQKDNFALEHSLL',
 'seq5': 'SWEEFAKAAEVLYLEDPMKCRMCTKYRHVDHKLVVKLTDNHTVLKYVTDMAQDVKKIEKLTTLLMR',
 'seq6': 'FTNWEEFAKAAERLHSANPEKCRFVTKYNHTKGELVLKLTDDVVCLQYSTNQLQDVKKLEKLSSTLLRSI',
 'seq7': 'SWEEFVERSVQLFRGDPNATRYVMKYRHCEGKLVLKVTDDRECLKFKTDQAQDAKKMEKLNNIFF',
 'seq8': 'SWDEFVDRSVQLFRADPESTRYVMKYRHCDGKLVLKVTDNKECLKFKTDQAQEAKKMEKLNNIFFTLM',
 'seq9': 'KNWEDFEIAAENMYMANPQNCRYTMKYVHSKGHILLKMSDNVKCVQYRAENMPDLKK',
 'seq10': 'FDSWDEFVSKSVELFRNHPDTTRYVVKYRHCEGKLVLKVTDNHECLKFKTDQAQDAKKMEK'}

# Task 2

### Data

In [4]:
input_file = "C:/Users/Admin/Downloads/prot.fasta"

### Functions

In [5]:
def my_own_residue_abundance(input_file, residue, threshold=0.2):
    
    seq_ids = []
    sequences = my_own_fasta_parser(input_file)

    for seq_id, sequence in sequences.items():
        freq = sequence.count(residue) / len(sequence)
        if freq > threshold:
            seq_ids.append(seq_id)

    return seq_ids

### Output

In [6]:
my_own_residue_abundance(input_file, 'K', threshold=0.12)

['seq1', 'seq2', 'seq5', 'seq7', 'seq8', 'seq9', 'seq10']

## Task 4

### Imports

In [7]:
import Bio
from Bio import pairwise2
from Bio.SubsMat import MatrixInfo as matlist



### Functions

In [8]:
def balign(first_seq, second_seq):

    # Load the matrix
    matrix = matlist.blosum62

    # Generate the alignments
    alns = pairwise2.align.globalds(first_seq, second_seq, matrix, -10, -0.5)

    # Extract the best alignment (first one in the alns list)
    top_aln = alns[0]

    # Print the alignment, ...
    aln_A, aln_B, score, begin, end = top_aln
    return pairwise2.format_alignment(aln_A, aln_B, score, begin, end)

### Output

In [9]:
print(balign('AACACACAAACGGTGTGCACCAAACACACGGGTGTACACACAAAAAC', 'CAAAGGATTGACACTAAAGACCAACCCACTACCCTATACACAAGAAGTAT'))

AACACACAAACGGTGTGCAC------CAAACACACGGGTGTACACACAAAA---AC
      ||||.|.|...|||      |.|||.|||.....||.||||||.|   |.
------CAAAGGATTGACACTAAAGACCAACCCACTACCCTATACACAAGAAGTAT
  Score=99



## Task 5

### Imports

In [10]:
from Bio.Seq import Seq
from Bio.Seq import MutableSeq

### Functions

In [11]:
def prots(dna):
    proteins = []
    seq = Seq(dna).tomutable()
    for i in range(3):
        seq.reverse()
        proteins.append(seq[i:].toseq().translate(to_stop=True))
    for i in range(3):
        seq.complement()
        proteins.append(seq[i:].toseq().translate(to_stop=True))
    for i in range(3):
        seq.reverse_complement()
        proteins.append(seq[i:].toseq().translate(to_stop=True))
    proteins.sort(key=lambda s: -len(s))
    return proteins

### Output

In [12]:
prots('AGTACTAGAGCATTCTATGGAG')



[Seq('EVSYEIM'),
 Seq('VLEHSME'),
 Seq('STRAFYG'),
 Seq('SIECSST'),
 Seq('GILRDH'),
 Seq('RYLTRS'),
 Seq('LHRML'),
 Seq('P'),
 Seq('Y')]

## Task 6

### Data

In [13]:
seq = 'ACTG'

### Functions

In [14]:
def rev_compl_one_line(seq):
    return ''.join([i for j in seq[::-1] for i in {'A':'T', 'C':'G', 'T':'A', 'G':'C'}[j]])

### Output

In [15]:
rev_compl_one_line(seq)

'CAGT'