In [1]:
from Bio import SeqIO
from Bio.SeqIO.FastaIO import SimpleFastaParser
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [3]:
fileName = 'HspB-5_RNA_aligned_trimmed.fasta'
with open('./../data/HspB-5/' + fileName) as fasta_file:
    ids = []
    sequences = []
    for title, sequence in SimpleFastaParser(fasta_file):
        ids.append(title.split(None, 1)[0])  # First word is ID
        sequences.append(sequence)

In [4]:
def replaceStopCodonsWithDashes(sequenceString):
    #Take in a dna/rna string and return the same string with `---` in place of any stop codons
    stopCodons = ['UAG', 'UAA', 'UGA', 'TAG', 'TAA', 'TGA', 'uag', 'uaa', 'uga', 'tag', 'taa', 'tga']
    numberOfCodons = len(sequenceString)/3
    codonStartChar = 0
    newSequenceString = ''
    
    
    while codonStartChar < len(sequenceString):
        codon = sequenceString[codonStartChar: (codonStartChar+3)]
        if codon in stopCodons:
            codon = '---'
        newSequenceString += codon
        codonStartChar +=3
    
    return newSequenceString

In [5]:
newSequences = []
for sequence in sequences:
    newSequences.append(replaceStopCodonsWithDashes(sequence))

In [6]:
records = []
for i in range(len(ids)):
    record = SeqRecord(Seq(newSequences[i]), id=ids[i])
    records.append(record)
SeqIO.write(records, "./" + fileName + "_strippedStops.fasta", "fasta")

150

In [7]:
def TrimToMakeDivByThree(listOfSequences):
    #Take in a list of dna/rna sequences (strings) and return the list with each one divisible by three
    #Return a new list of sequences (strings)
    trimmedSequences = []
    for seq in listOfSequences:
        trimmedSeq = seq[:-(len(seq) % 3)]
        trimmedSequences.append(trimmedSeq)
    return trimmedSequences

In [8]:
trimmedSequences = TrimToMakeDivByThree(newSequences)

In [9]:
records = []
for i in range(len(ids)):
    record = SeqRecord(Seq(trimmedSequences[i]), id=ids[i])
    records.append(record)
SeqIO.write(records, "./" + fileName + "_strippedStops_trimmed.fasta", "fasta")

150