In [8]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqUtils import six_frame_translations, gc_fraction
from Bio.Data import CodonTable

# Open the FASTA file
fasta_file = "lncRNA.fa"

# Create the standard genetic code table
standard_table = CodonTable.unambiguous_dna_by_id[1]

# Iterate over each sequence in the file
for seq_record in SeqIO.parse(fasta_file, "fasta"):
    # Find all ORFs in the sequence
    orfs = six_frame_translations(str(seq_record.seq))

    # Compute the protein coding potential of each ORF
    coding_potentials = []
    for orf in orfs:
        if len(orf) > 50: # Filter out short ORFs
            seq = Seq(orf)
            gc_content = gc_fraction(seq)
            if gc_content >= 0.3 and gc_content <= 0.7: # Filter out ORFs with extreme GC content
                protein = seq.translate(table=standard_table, to_stop=True)
                coding_potential = protein.count("*") / len(protein) # Calculate the coding potential
                coding_potentials.append(coding_potential)

    # Take the maximum coding potential as the protein coding potential of the sequence
    if coding_potentials:
        protein_coding_potential = max(coding_potentials)
    else:
        protein_coding_potential = 0

    # Create a new SeqRecord object with the protein coding potential as a feature
    seq_record.features = []
    seq_record.features.append(SeqRecord(seq_record.seq, id=seq_record.id, name=seq_record.name,
                                          description="Protein coding potential: {:.2f}".format(protein_coding_potential)))
    # Print the sequence ID and the protein coding potential
    print(seq_record.id, protein_coding_potential)


FileNotFoundError: [Errno 2] No such file or directory: 'sequences.fasta'