In [1]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import itertools

# Define the original protein sequence
original_sequence = "CAKDAVDTGGYTGYTYGCDYGVVDAW"

# List of standard amino acids
amino_acids = 'ACDEFGHIKLMNPQRSTVWY'

# Function to generate all possible single amino acid mutants
def generate_mutants(sequence):
    mutants = []
    for i in range(len(sequence)):
        for aa in amino_acids:
            if sequence[i] != aa:
                mutant_seq = sequence[:i] + aa + sequence[i+1:]
                mutants.append((i+1, sequence[i], aa, mutant_seq))
    return mutants

# Generate all mutants
mutants = generate_mutants(original_sequence)

# Save mutants to a file
with open('mutants.fasta', 'w') as f:
    for i, (position, original_aa, mutant_aa, mutant_seq) in enumerate(mutants):
        record = SeqRecord(Seq(mutant_seq),
                           id=f"mutant_{i+1}_{original_aa}{position}{mutant_aa}",
                           description=f"Position: {position}, {original_aa} -> {mutant_aa}")
        SeqIO.write(record, f, "fasta")

print(f"Generated {len(mutants)} mutants and saved to mutants.fasta")

Generated 494 mutants and saved to mutants.fasta


In [2]:
ls

 Volume in drive C is OSDisk
 Volume Serial Number is F808-222D

 Directory of C:\Users\mittalb

24/06/2024  15:54    <DIR>          .
24/06/2024  15:54    <DIR>          ..
02/04/2024  15:47    <DIR>          .anaconda
07/11/2023  09:37    <DIR>          .bubblegum
24/06/2024  15:54    <DIR>          .conda
03/07/2023  12:56                25 .condarc
03/07/2023  12:55    <DIR>          .continuum
24/06/2024  15:54    <DIR>          .ipynb_checkpoints
16/05/2023  14:27    <DIR>          .ipython
02/04/2024  15:50    <DIR>          .jupyter
16/11/2023  14:57    <DIR>          .matplotlib
26/04/2023  12:55    <DIR>          .ms-ad
01/11/2023  15:27    <DIR>          .openjfx
17/11/2023  13:19    <DIR>          .reactome
16/11/2023  14:57    <DIR>          .shiv
28/04/2023  13:54    <DIR>          .ssh
14/05/2024  10:16    <DIR>          .swt
27/07/2023  15:58    <DIR>          .UGENE_files
26/04/2023  12:25    <DIR>          3D Objects
19/06/2024  15:47            15,276 aaseq-ml.ipynb


In [4]:
from Bio import SeqIO

In [5]:
fasta_file = "mutants.fasta"

In [9]:
with open(fasta_file, "r") as file:
    for record in SeqIO.parse(file, "fasta"):
        print(f"ID: {record.id}")
        print(f"Sequence: {record.seq}")
        print(f"Description: {record.description}\n")

ID: mutant_1_C1A
Sequence: AAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_1_C1A Position: 1, C -> A

ID: mutant_2_C1D
Sequence: DAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_2_C1D Position: 1, C -> D

ID: mutant_3_C1E
Sequence: EAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_3_C1E Position: 1, C -> E

ID: mutant_4_C1F
Sequence: FAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_4_C1F Position: 1, C -> F

ID: mutant_5_C1G
Sequence: GAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_5_C1G Position: 1, C -> G

ID: mutant_6_C1H
Sequence: HAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_6_C1H Position: 1, C -> H

ID: mutant_7_C1I
Sequence: IAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_7_C1I Position: 1, C -> I

ID: mutant_8_C1K
Sequence: KAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_8_C1K Position: 1, C -> K

ID: mutant_9_C1L
Sequence: LAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_9_C1L Position: 1, C -> L

ID: mutant_10_C1M
Sequence: MAKDAVDTGGYTGYTYGCDYGVVDAW
Description: mutant_10_C1M Position: