In [81]:
%load_ext pycodestyle_magic
%flake8_on

The pycodestyle_magic extension is already loaded. To reload it, use:
  %reload_ext pycodestyle_magic


In [82]:
from itertools import permutations
from Bio import SeqIO

In [84]:
# 1. Generate all variants of sequence length from 1 to n

def generate_seq(n):
    """Yield all possible combination of sequence length from 1 to n"""
    for num_letters in range(1, n+1):
        yield from("".join(i) for i in permutations("AGTC", num_letters))


data = generate_seq(2)
print(next(data))
print(next(data))
list(data)

A
G


['T',
 'C',
 'AG',
 'AT',
 'AC',
 'GA',
 'GT',
 'GC',
 'TA',
 'TG',
 'TC',
 'CA',
 'CG',
 'CT']

In [86]:
# 2. Read and translate fasta

def translate_fasta(file, table="Standard"):
    """Read fasta file and yield fasta header and translated seq. By default
    we use standard table, for alternative use number of tables from here:
    https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi"""
    with open(file, "r") as handle:
        for record in SeqIO.parse(handle, "fasta"):
            yield [record.description, str(record.seq.translate(table=table))]


data = translate_fasta('random.fasta')
print(next(data))
print(next(data))
list(data)

['random sequence 1', 'PLFLRHSRGLS*RLAFPRVRLDSNSA*SSS*LMGTARRQLALRPA*EAKL*SCHLERQTP']
['random sequence 2', '*PPPAGRLVRNNASKSEHQKRSDASRGVRIGYTSLQ*IEAGAMVTIAH*H*TTDCLQVCQ']


[['random sequence 3',
  'QPSRV*DGLVPFPRAVGPARLTWTLAVRLYDPILVVFDQSQNATSYYGDT*VRLTYLLQL'],
 ['random sequence 4',
  'HELDPRIWRRVAIP*VSSPAVTS*NPHAPRNLVSGVRAA*IDY*FTVGL*ACVLYFLNVW'],
 ['random sequence 5',
  'GQRAFAF*PVRVTARLSNCVVAGLQRHIPSKKLYWSK*RILKDNER*LRRALKIRHTSSS'],
 ['random sequence 6',
  'YKPRGALVVRL*LMRTVRPIPTPLA*PGRADVLIRKDTPFELDVATL*PPRRRYLRILFL'],
 ['random sequence 7',
  'RLRHPSHYAEGLKLNSIE*SVVRRLNCATPRQAK*QRHAFDRVTCTQTRATKVIDALDNI'],
 ['random sequence 8',
  'VAVLLVTRMRTDAKSGSDRIDRSRSDARVPSYKSL*VSHGSRSLCRLAPSSRTRAQPVVV'],
 ['random sequence 9',
  'LNGDQIHTCML*CDRIALSRLQTNLRRFSFVTLIPSHFTSRVDDCYWLP*SIPADASSLL'],
 ['random sequence 10',
  'RSRDSRLLPLRLNADPSRPHTEVAFSPNRVQNSCRFGECK*RTDKGIHNRQGRYTQCTRY']]