In [2]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation

In [4]:
#get the reference sequence for each gene
all_genes = ["ORF1ab", "RdRp", "Spike", "S1", "S2", "Protein4a", "Protein4b", "E", "M", "N"]

refseq_by_gene = {g:'' for g in all_genes}

genome_ref_file = 'reference_229e_genome.gb'

for record in SeqIO.parse(open(genome_ref_file,"r"), "genbank"):
    info_for_new_refs = {'id':record.id, 'name': record.name}

    for gene in all_genes:
        for feature in record.features:
            if feature.type == 'CDS':
                if 'locus_tag' in feature.qualifiers:
                    if feature.qualifiers['locus_tag'] == [gene]:
                        gene_sequence = feature.location.extract(record.seq)
                        refseq_by_gene[gene]= gene_sequence

In [22]:
for gene in all_genes:
    gene_seq = refseq_by_gene[gene]
    # Create a record
    gene_record = SeqRecord(gene_seq,
                       id=info_for_new_refs['id'],
                       name=info_for_new_refs['name'],
                       description=f'Human coronavirus 229E, {gene} protein',
                        annotations={"molecule_type": "RNA"})

    # Add source annotation
    source_feature = SeqFeature(FeatureLocation(start=0, end=len(gene_seq)), type='source')
    gene_record.features.append(source_feature)
    
    #Add gene annotation
    gene_feature = SeqFeature(FeatureLocation(start=0, end=len(gene_seq)), type='CDS', 
                              qualifiers={'locus_tag':gene})
    gene_record.features.append(gene_feature)

    # Save as GenBank file
    SeqIO.write(gene_record, f'reference_229e_{gene}.gb', "genbank")