In [6]:
input_protein_file = "ADA2.txt"
mutations_of_interest = ["G47A", "Y453C"]

In [7]:
import re

In [8]:
def clean_text_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    cleaned_content = ''.join(line.strip() for line in lines if not line.startswith('>'))
    return cleaned_content

def split_on_transition(s):
    return re.findall(r'[A-Za-z]+|\d+', s)

def replace_letter_at_index(s, index, new_letter, original_letter):
    if index < 0 or index >= len(s):
        raise ValueError("Index is out of bounds")
    if s[index] != original_letter:
        raise ValueError(f"Original letter at position {index+1} is not '{original_letter}'")
    return s[:index] + new_letter + s[index + 1:]

def make_mutant_fasta_header(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
    mutant_fasta_header = ''
    for line in lines:
        if line.startswith('>'):
            mutant_fasta_header = line.replace('>', '>MUTANT_', 1)
            mutant_fasta_header = mutant_fasta_header.strip()
    return mutant_fasta_header

def write_strings_to_file(file_path, first_string, second_string):
    with open(file_path, 'w') as file:
        # Write the first string as the first line
        file.write(first_string + '\n')
        
        # Break the second string into lines of 50 characters
        for i in range(0, len(second_string), 50):
            file.write(second_string[i:i+50] + '\n')

In [9]:
## import protein text
protein = clean_text_file(input_protein_file)
mutant_protein = clean_text_file(input_protein_file)

## parse desired mutations
mutation_list = []
for mutation in mutations_of_interest:
    result = split_on_transition(mutation)
    mutation_list.append(result)

## create mutations
for mutation in mutation_list:
    original_residue = mutation[0]
    position = int(mutation[1])
    new_residue = mutation[2]

    mutant_protein = replace_letter_at_index(mutant_protein, position-1, new_residue, original_residue) ## -1 b/c switching from normal counting to python zero-indexing

## make output file
output_protein_file = input_protein_file.split(".")[0] + "_mutant.txt"

new_fasta_header = make_mutant_fasta_header(input_protein_file)

write_strings_to_file(output_protein_file, new_fasta_header, mutant_protein)

In [10]:
## next steps: start looking into protein stability stuff