In [16]:
import os
from Bio import SeqIO
from pathlib import Path

In [18]:
"""
Purpose: mine the rRNA from Genbank sequences
and output in a fasta file with the accession number, locus tag, 
the start, the end, and the direction of the sequence, with
optional database identifiers and optional prodcuts or notes

The current version looks for the .gb Genbank files in the folder
that is two levels under the main folder
and loops through each Genbank files to look for the rRNA feature.
"""

def SDRNA_RNA_Mining(folder,subfolder):
    pathname = Path(folder + "/" + subfolder + "/")
    filelist = os.listdir(pathname)
    opname = folder + " " + subfolder + " output.fasta"
    output_file = pathname / opname
    f = open(output_file, "w")
    
    for fn in range(len(filelist)):
        if "gb" in filelist[fn]: 
            gbfile = filelist[fn]
            input_file = pathname / gbfile

            for rec in SeqIO.parse(input_file, "gb"): 
                acc = rec.annotations['accessions'][0]
                seq = rec.seq

                for feature in rec.features:
                    ifrrRNA = 0

                    if "rRNA" in feature.type:
                        print(feature)

                        seqstart = feature.location.start.position
                        seqend = feature.location.end.position

                        if feature.location.strand == 1:
                            strdir = "forward"
                            rRNAseq = seq[seqstart:seqend]
                        elif feature.location.strand == -1:
                            strdir = "revcomp"
                            rRNAseq = seq[seqstart:seqend].reverse_complement()[::]
                        else:
                            strdir = ""

                        if "db_xref" in feature.qualifiers:
                            addinfo1 = feature.qualifiers["db_xref"][0]
                        else:
                            addinfo1 = ""
                        
                        if "product" in feature.qualifiers:
                            addinfo2 = feature.qualifiers["product"][0]
                        elif "note" in feature.qualifiers:
                            addinfo2 = feature.qualifiers["note"][0]
                        else:
                            addinfo2 = ""

                        f.write(">{0}|{1}|{2}-{3}|{4}|{5}|{6}| \n{7}\n".format(acc, 
                                                                               feature.qualifiers["locus_tag"][0], 
                                                                               seqstart, seqend, strdir,
                                                                               addinfo1, addinfo2, rRNAseq))
    f.close()
    
SDRNA_RNA_Mining("SC 7000071044","NCBI SC S288C")

type: rRNA
location: [6545:8194](+)
qualifiers:
    Key: GO_component, Value: ['GO:0005763 - mitochondrial small ribosomal subunit [Evidence IDA] [PMID 6262728]']
    Key: GO_function, Value: ['GO:0003735 - structural constituent of ribosome [Evidence IC] [PMID 6262728]']
    Key: GO_process, Value: ['GO:0032543 - mitochondrial translation [Evidence IC] [PMID 6262728]']
    Key: db_xref, Value: ['SGD:S000007287']
    Key: gene, Value: ['15S_RRNA']
    Key: locus_tag, Value: ['Q0020']
    Key: note, Value: ['Ribosomal RNA of the small mitochondrial ribosomal subunit; MSU1 allele suppresses ochre stop mutations in mitochondrial protein-coding genes']
    Key: product, Value: ['15S ribosomal RNA']

type: rRNA
location: [58008:62447](+)
qualifiers:
    Key: GO_component, Value: ['GO:0005762 - mitochondrial large ribosomal subunit [Evidence IDA] [PMID 6759872]']
    Key: GO_function, Value: ['GO:0003735 - structural constituent of ribosome [Evidence IMP,ISS] [PMID 6759872]']
    Key: GO_pro

In [None]:
"""
Use the Tkinter user interface to select the files
Waiting for IT's approval for ActiveTcl
"""

"""
import Tkinter as tk
import tkFileDialog

root = Tkinter.Tk()
filez = tkFileDialog.askopenfilenames(parent=root,title='Choose a file')
print root.tk.splitlist(filez)

data_folder = Path("Project_3_Seq_Test/KP 7000772268/")
file_to_open = data_folder / "13005730928.gb"
print(file_to_open.read_text())

"""