# Try to map JGI locus tags (Ga0398403_#) to NCBI locus tags (EQU24_RS#####) for _M. buryatense_

In [1]:
from Bio import SeqIO
import pandas as pd

## Load NCBI genbank file and parse out features
NCBI genbank file - I used one we downloaded to waffle in April 2022
it looks like this is an updated link to the file? 
https://www.ncbi.nlm.nih.gov/datasets/genome/GCF_005931095.1/ 

or here if you click "Download" and choose "Sequence and annotation (GBFF)"
https://www.ncbi.nlm.nih.gov/datasets/taxonomy/95641/

it's hard to trace files when websites get updated year to year >.<

In [2]:
gbFile_5G = 'data/genomes/5GB1c_sequence_20220411.gb'

In [3]:
# feature tuple indices
LEFT_IDX = 0
RIGHT_IDX = 1
STRAND_IDX = 2
LOCUS_IDX = 3
GENE_IDX = 4
TYPE_IDX = 5
PROD_IDX = 6

def get_feature_tuples_from_genbank(gb_file):
    '''
    Given a genbank file, parse out all of it's features into a 6-tuple 
    of (start_coord, end_coord,locus_tag,gene_symbol,type,product).
    '''
    # Use BioPython genbank parser
    seq_record = SeqIO.parse(gb_file, "genbank").__next__()
    
    feat_list = []
    # Loop over the genome file, get the CDS features on each of the strands
    for feature in seq_record.features:
        if 'locus_tag' in feature.qualifiers:
            # get  locus tag 
            lt = feature.qualifiers['locus_tag'][0]
            # get the gene symbol if available, otherwise leave blank
            g = "" if 'gene' not in feature.qualifiers else feature.qualifiers['gene'][0]
            prod = "" if 'product' not in feature.qualifiers else feature.qualifiers['product'][0]
            
            feat_list.append((feature.location.start.position,
                             feature.location.end.position,
                             feature.strand,
                             lt,
                             g,
                             feature.type,
                             prod))
            
    return feat_list

def get_gbfeats2exclude(filename):
    '''
    Open the config file specifying which kinds of genbank features
    to exclude when extracting pos and neg feats (these are typically
    features that a user may find are just redundant entries. For example,
    in the M. buryatense genbank file, there are duplicate entries labeled
    as "CDS" and "gene" that have the same coordinate/locus tag. I decided
    to go with "CDS" and exclude the "gene" version of each feature. 
    
    A user can amend this configuration in config/gbfeats2exclude.txt"
    '''
    with open(filename,'r') as f:
        lines = [x.strip() for x in f.readlines()]
    return lines


In [4]:
# extract all the genbank features
feats = get_feature_tuples_from_genbank(gbFile_5G)

# filter out some duplicates due to genbank formatting
gbfeats2exclude = get_gbfeats2exclude('config/gbfeats2exclude.txt')
feats_filt = [x for x in feats if (x[TYPE_IDX] not in gbfeats2exclude)]

# print example CDS entries extracted into a list
feats_filt[:5]

[(0,
  1317,
  1,
  'EQU24_RS00005',
  'dnaA',
  'CDS',
  'chromosomal replication initiator protein DnaA'),
 (1502,
  2603,
  1,
  'EQU24_RS00010',
  'dnaN',
  'CDS',
  'DNA polymerase III subunit beta'),
 (3060,
  4140,
  1,
  'EQU24_RS00015',
  'recF',
  'CDS',
  'DNA replication/repair protein RecF'),
 (4185,
  6600,
  1,
  'EQU24_RS00020',
  'gyrB',
  'CDS',
  'DNA topoisomerase (ATP-hydrolyzing) subunit B'),
 (7350, 7734, 1, 'EQU24_RS00035', '', 'CDS', 'hypothetical protein')]

In [5]:
# use the start and stop coord as a unique key to find the ncbi locus tag
start_stop_ncbi = dict([((x[LEFT_IDX],x[RIGHT_IDX]),x) for x in feats_filt])
start_stop_ncbi

{(0, 1317): (0,
  1317,
  1,
  'EQU24_RS00005',
  'dnaA',
  'CDS',
  'chromosomal replication initiator protein DnaA'),
 (1502, 2603): (1502,
  2603,
  1,
  'EQU24_RS00010',
  'dnaN',
  'CDS',
  'DNA polymerase III subunit beta'),
 (3060, 4140): (3060,
  4140,
  1,
  'EQU24_RS00015',
  'recF',
  'CDS',
  'DNA replication/repair protein RecF'),
 (4185, 6600): (4185,
  6600,
  1,
  'EQU24_RS00020',
  'gyrB',
  'CDS',
  'DNA topoisomerase (ATP-hydrolyzing) subunit B'),
 (7350, 7734): (7350,
  7734,
  1,
  'EQU24_RS00035',
  '',
  'CDS',
  'hypothetical protein'),
 (7818, 9075): (7818,
  9075,
  1,
  'EQU24_RS00040',
  '',
  'CDS',
  'TolC family protein'),
 (9071, 10241): (9071,
  10241,
  1,
  'EQU24_RS00045',
  '',
  'CDS',
  'efflux RND transporter periplasmic adaptor subunit'),
 (10240, 13306): (10240,
  13306,
  1,
  'EQU24_RS00050',
  '',
  'CDS',
  'CusA/CzcA family heavy metal efflux RND transporter'),
 (13298, 13604): (13298,
  13604,
  1,
  'EQU24_RS00055',
  '',
  'CDS',
  'DUF

## Load in table of JGI genes sent from Mary

In [6]:
jgi_table = pd.read_csv('data/jgi_5GB1C_genes.txt',sep='\t')

# looks like JGI has the start coord shifted by 1, so subtract one from all start coords
jgi_table['start_shift'] = jgi_table['start'] - 1

# switch to use 1/-1 for +/- strand
jgi_table['strand_alt'] = jgi_table['strand'].apply(lambda x: 1 if x=='+' else -1)

jgi_table.head()

Unnamed: 0,genome,type,start,stop,strand,gene_id,locus_tag,product,Unnamed: 8,Unnamed: 9,Unnamed: 10,start_shift,strand_alt
0,Ga0398403_01,CDS,82,1317,+,2839902986,Ga0398403_1,chromosomal replication initiator protein,,,,81,1
1,Ga0398403_01,CDS,1503,2603,+,2839902987,Ga0398403_2,DNA polymerase-3 subunit beta,,,,1502,1
2,Ga0398403_01,CDS,3061,4140,+,2839902988,Ga0398403_3,DNA replication and repair protein RecF,,,,3060,1
3,Ga0398403_01,CDS,4186,6600,+,2839902989,Ga0398403_4,DNA gyrase subunit B,,,,4185,1
4,Ga0398403_01,CDS,7351,7734,+,2839902990,Ga0398403_5,hypothetical protein,,,,7350,1


In [7]:
# get just the tuple elements we need for processing matches
jgi_feat_tuples = jgi_table[['start_shift','stop','strand_alt','locus_tag','gene_id','type','product']]
jgi_feat_tuples.head()

Unnamed: 0,start_shift,stop,strand_alt,locus_tag,gene_id,type,product
0,81,1317,1,Ga0398403_1,2839902986,CDS,chromosomal replication initiator protein
1,1502,2603,1,Ga0398403_2,2839902987,CDS,DNA polymerase-3 subunit beta
2,3060,4140,1,Ga0398403_3,2839902988,CDS,DNA replication and repair protein RecF
3,4185,6600,1,Ga0398403_4,2839902989,CDS,DNA gyrase subunit B
4,7350,7734,1,Ga0398403_5,2839902990,CDS,hypothetical protein


In [8]:
# make a dict of the locus tag to the start-stop coords
jgi_locus2coords = dict([((x[LOCUS_IDX],(x[LEFT_IDX],x[RIGHT_IDX]))) for x in jgi_feat_tuples.values])
jgi_locus2coords

{'Ga0398403_1': (81, 1317),
 'Ga0398403_2': (1502, 2603),
 'Ga0398403_3': (3060, 4140),
 'Ga0398403_4': (4185, 6600),
 'Ga0398403_5': (7350, 7734),
 'Ga0398403_6': (7818, 9075),
 'Ga0398403_7': (9071, 10241),
 'Ga0398403_8': (10237, 13306),
 'Ga0398403_9': (13298, 13604),
 'Ga0398403_10': (13842, 16413),
 'Ga0398403_11': (16806, 19176),
 'Ga0398403_12': (19263, 19479),
 'Ga0398403_13': (19487, 19799),
 'Ga0398403_14': (19905, 20091),
 'Ga0398403_15': (20389, 21376),
 'Ga0398403_16': (21633, 22416),
 'Ga0398403_17': (22412, 23246),
 'Ga0398403_18': (23251, 23782),
 'Ga0398403_19': (23841, 24324),
 'Ga0398403_20': (24340, 24994),
 'Ga0398403_21': (25137, 25554),
 'Ga0398403_22': (25560, 26349),
 'Ga0398403_23': (26266, 26707),
 'Ga0398403_24': (27224, 29585),
 'Ga0398403_25': (29858, 31106),
 'Ga0398403_26': (31932, 32040),
 'Ga0398403_27': (32310, 32679),
 'Ga0398403_28': (32679, 33453),
 'Ga0398403_29': (33495, 34086),
 'Ga0398403_30': (34142, 34445),
 'Ga0398403_31': (34455, 35997),
 

In [9]:
# make a dict of the start-stop coords to the rest of the info
start_stop_jgi = dict([((x[LEFT_IDX],x[RIGHT_IDX]),tuple(x)) for x in jgi_feat_tuples.values])
start_stop_jgi

{(81, 1317): (81,
  1317,
  1,
  'Ga0398403_1',
  2839902986,
  'CDS',
  'chromosomal replication initiator protein'),
 (1502, 2603): (1502,
  2603,
  1,
  'Ga0398403_2',
  2839902987,
  'CDS',
  'DNA polymerase-3 subunit beta'),
 (3060, 4140): (3060,
  4140,
  1,
  'Ga0398403_3',
  2839902988,
  'CDS',
  'DNA replication and repair protein RecF'),
 (4185, 6600): (4185,
  6600,
  1,
  'Ga0398403_4',
  2839902989,
  'CDS',
  'DNA gyrase subunit B'),
 (7350, 7734): (7350,
  7734,
  1,
  'Ga0398403_5',
  2839902990,
  'CDS',
  'hypothetical protein'),
 (7818, 9075): (7818,
  9075,
  1,
  'Ga0398403_6',
  2839902991,
  'CDS',
  'outer membrane protein TolC'),
 (9071, 10241): (9071,
  10241,
  1,
  'Ga0398403_7',
  2839902992,
  'CDS',
  'cobalt-zinc-cadmium efflux system membrane fusion protein'),
 (10237, 13306): (10237,
  13306,
  1,
  'Ga0398403_8',
  2839902993,
  'CDS',
  'cobalt-zinc-cadmium resistance protein CzcA'),
 (13298, 13604): (13298,
  13604,
  1,
  'Ga0398403_9',
  28399029

In [10]:
#stop check some example coords
coords = (97499, 98117)#(89828, 90221)#(9071, 10241)

print(start_stop_ncbi[coords])
print(start_stop_jgi[coords])

(97499, 98117, -1, 'EQU24_RS00470', '', 'CDS', 'GDSL-type esterase/lipase family protein')
(97499, 98117, -1, 'Ga0398403_92', 2839903077, 'CDS', 'lysophospholipase L1-like esterase')


## Load in the table of unique 5G genes (not in 20Z)

In [11]:
# load the Unique to 5G gene file
uniq = pd.read_csv('data/unique_5GB1C_vs_20Z.txt',sep='\t')
uniq.head()

Unnamed: 0,Result,Gene ID,Locus Tag,Gene Name,Length,Unnamed: 5
0,1,2839902999,Ga0398403_14,hypothetical protein,61,
1,2,2839903008,Ga0398403_23,hypothetical protein,146,
2,3,2839903011,Ga0398403_26,hypothetical protein,35,
3,4,2839903022,Ga0398403_37,hypothetical protein,55,
4,5,2839903023,Ga0398403_38,transposase,392,


In [12]:
uniq_loci = uniq['Locus Tag'].values
uniq_loci

array(['Ga0398403_14', 'Ga0398403_23', 'Ga0398403_26', 'Ga0398403_37',
       'Ga0398403_38', 'Ga0398403_39', 'Ga0398403_40', 'Ga0398403_41',
       'Ga0398403_43', 'Ga0398403_44', 'Ga0398403_47', 'Ga0398403_58',
       'Ga0398403_64', 'Ga0398403_150', 'Ga0398403_160', 'Ga0398403_161',
       'Ga0398403_172', 'Ga0398403_181', 'Ga0398403_193', 'Ga0398403_200',
       'Ga0398403_201', 'Ga0398403_208', 'Ga0398403_215', 'Ga0398403_217',
       'Ga0398403_218', 'Ga0398403_219', 'Ga0398403_225', 'Ga0398403_226',
       'Ga0398403_230', 'Ga0398403_232', 'Ga0398403_233', 'Ga0398403_234',
       'Ga0398403_235', 'Ga0398403_240', 'Ga0398403_241', 'Ga0398403_242',
       'Ga0398403_243', 'Ga0398403_244', 'Ga0398403_245', 'Ga0398403_248',
       'Ga0398403_249', 'Ga0398403_250', 'Ga0398403_251', 'Ga0398403_252',
       'Ga0398403_253', 'Ga0398403_254', 'Ga0398403_255', 'Ga0398403_256',
       'Ga0398403_257', 'Ga0398403_258', 'Ga0398403_259', 'Ga0398403_261',
       'Ga0398403_262', 'Ga0398403_263

## Compare the unique JGI coordinates and NCBI coordinates to find exact matches
Collect info about JGI genes that don't have a match

In [27]:
# set up dataframe organization
aligned_cols = ['jgi_start','jgi_stop','jgi_strand','jgi_locus_tag','jgi_gene_id','jgi_type','jgi_product',
                'ncbi_start','ncbi_stop','ncbi_strand','ncbi_locus_tag','ncbi_gene_id','ncbi_type','ncbi_product']
aligned_loci = []

# collect JGI genes without exact coord matches
manual_lookups = 0
manual_loci = []

# loop through uniq loci and collect table info
for loc in uniq_loci:
    # get the jgi coords
    jgi_coords = jgi_locus2coords[loc]
    jgi_row = list(start_stop_jgi[jgi_coords])
    
    # if these coordinates exactly match something in NCBI, retrieve info
    if jgi_coords in start_stop_ncbi:
        ncbi_coords = jgi_coords
        ncbi_row = list(start_stop_ncbi[ncbi_coords])

    # if no match, it will have to be looked up manually
    else:
        #print(f"JGI loc {loc} ({jgi_coords}) did not have match in NCBI - manual")
        manual_loci.append(loc)
        manual_lookups += 1
        ncbi_row = ['MANUAL','MANUAL','MANUAL','MANUAL','MANUAL','MANUAL','MANUAL']
        
    row = jgi_row + ncbi_row
    aligned_loci.append(row)
        
print()
print("Manual lookups:", manual_lookups)



Manual lookups: 218


In [24]:
df = pd.DataFrame(aligned_loci, columns=aligned_cols)
df

Unnamed: 0,jgi_start,jgi_stop,jgi_strand,jgi_locus_tag,jgi_gene_id,jgi_type,jgi_product,ncbi_start,ncbi_stop,ncbi_strand,ncbi_locus_tag,ncbi_gene_id,ncbi_type,ncbi_product
0,19905,20091,-1,Ga0398403_14,2839902999,CDS,hypothetical protein,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL
1,26266,26707,-1,Ga0398403_23,2839903008,CDS,hypothetical protein,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL
2,31932,32040,1,Ga0398403_26,2839903011,CDS,hypothetical protein,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL
3,40300,40468,1,Ga0398403_37,2839903022,CDS,hypothetical protein,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL
4,40588,41767,1,Ga0398403_38,2839903023,CDS,transposase,40588,41767,1,EQU24_RS00180,,CDS,ISAzo13 family transposase
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
801,4931344,4931812,1,Ga0398403_4469,2839907447,CDS,predicted HicB family RNase H-like nuclease,4931344,4931812,1,EQU24_RS21865,,CDS,type II toxin-antitoxin system HicB family ant...
802,4931798,4932107,1,Ga0398403_4470,2839907448,CDS,hypothetical protein,4931798,4932107,1,EQU24_RS21870,,CDS,hypothetical protein
803,4937246,4939529,-1,Ga0398403_4473,2839907451,CDS,hypothetical protein,4937246,4939529,-1,EQU24_RS21885,,CDS,2OG-Fe(II) oxygenase
804,4966593,4966863,1,Ga0398403_4495,2839907473,CDS,hypothetical protein,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL,MANUAL


In [25]:
# output file of mappings
df.to_csv('jgi_ncbi_aln_by_start_stop_coord.tsv',sep='\t',index=False)

## What to do about manual lookups?
Try to find NCBI genes that have some coordinate overlap? Maybe this will help find more matches with slightly shifted genome coordinates?

Otherwise, you'd probably have to BLAST to find matches between the genomes.

In [30]:
def get_feats_from_genbank(gb_file):
    '''
    Given a genbank file, parse out all of it's features into raw SeqIO objects.
    '''
    # Use BioPython genbank parser
    seq_record = SeqIO.parse(gb_file, "genbank").__next__()
    
    feat_list = []
    gbfeats2exclude = get_gbfeats2exclude('config/gbfeats2exclude.txt')
    
    # Loop over the genome file, get the CDS features on each of the strands
    for feature in seq_record.features:
        if 'locus_tag' in feature.qualifiers:
            if feature.type not in gbfeats2exclude:
                feat_list.append(feature)
    return feat_list

In [31]:
feats = get_feats_from_genbank(gbFile_5G)
feats[:5]

[SeqFeature(FeatureLocation(ExactPosition(0), ExactPosition(1317), strand=1), type='CDS'),
 SeqFeature(FeatureLocation(ExactPosition(1502), ExactPosition(2603), strand=1), type='CDS'),
 SeqFeature(FeatureLocation(ExactPosition(3060), ExactPosition(4140), strand=1), type='CDS'),
 SeqFeature(FeatureLocation(ExactPosition(4185), ExactPosition(6600), strand=1), type='CDS'),
 SeqFeature(FeatureLocation(ExactPosition(7350), ExactPosition(7734), strand=1), type='CDS')]

In [47]:
# Get nearby feature hints for missing JGI loci
for locus in manual_loci:
    print("---------------------")
    jgi_coords = jgi_locus2coords[locus]
    jgi_prod = start_stop_jgi[jgi_coords][PROD_IDX]
    print(f"Hints for JGI locus {locus} ({prod}) with coords {jgi_coords}")
    
    # find overlapping NCBI features
    for feat in feats:
        # If the JGI start or stop coord overlap with the NCBI feature, make a note
        if (jgi_coords[0] in feat) or (jgi_coords[1] in feat):
            # Collect NCBI feature details
            lt = feat.qualifiers['locus_tag'][0] # locus tag
            g = "" if 'gene' not in feat.qualifiers else feat.qualifiers['gene'][0]
            prod = "" if 'product' not in feat.qualifiers else feat.qualifiers['product'][0]
            start = feat.location.start.position
            stop = feat.location.end.position
            
            feat_tuple = (start,stop,feat.strand,lt,g,feat.type,prod)
            
            # print some summary info
            print(f"-->Overlapping NCBI feature {lt} with coords ({start},{stop})")
            print(f"-->{feat_tuple}")
            print("___Full Genbank entry___")
            print(feat)
    print("---------------------")
    print()

---------------------
Hints for JGI locus Ga0398403_14 (hypothetical protein) with coords (19905, 20091)
---------------------

---------------------
Hints for JGI locus Ga0398403_23 (hypothetical protein) with coords (26266, 26707)
-->Overlapping NCBI feature EQU24_RS00115 with coords (25557,26349)
-->(25557, 26349, 1, 'EQU24_RS00115', 'speD', 'CDS', 'adenosylmethionine decarboxylase')
___Full Genbank entry___
type: CDS
location: [25557:26349](+)
qualifiers:
    Key: EC_number, Value: ['4.1.1.50']
    Key: GO_function, Value: ['GO:0004014 - adenosylmethionine decarboxylase activity [Evidence IEA]']
    Key: GO_process, Value: ['GO:0008295 - spermidine biosynthetic process [Evidence IEA]']
    Key: codon_start, Value: ['1']
    Key: gene, Value: ['speD']
    Key: inference, Value: ['COORDINATES: similar to AA sequence:RefSeq:WP_005411315.1']
    Key: locus_tag, Value: ['EQU24_RS00115']
    Key: note, Value: ['Derived by automated computational analysis using gene prediction method: Pro

type: CDS
location: [968345:968924](-)
qualifiers:
    Key: GO_component, Value: ['GO:0015627 - type II protein secretion system complex [Evidence IEA]']
    Key: codon_start, Value: ['1']
    Key: inference, Value: ['COORDINATES: protein motif:HMM:NF023444.2']
    Key: locus_tag, Value: ['EQU24_RS04615']
    Key: note, Value: ['Derived by automated computational analysis using gene prediction method: Protein Homology.']
    Key: old_locus_tag, Value: ['EQU24_04615']
    Key: product, Value: ['GspH/FimT family pseudopilin']
    Key: protein_id, Value: ['WP_017840625.1']
    Key: transl_table, Value: ['11']
    Key: translation, Value: ['MAKPMPKYSKIHYGFTLIELMMTIAIGAIVLTLAVPSFNTVIRNDRLTTRTNELVASLNFARSEAIKRGIRVTACKSQNPNATPPSCTTSNSVNWSIGWIIFTDPNNNATFDSNTETLLRIQENPLTNITMTGSLNIANYISFVASGQSRLTNGNHQSGNIKVCDDRTGNIGVNIALNNAGRLLTQQEIACP']

---------------------

---------------------
Hints for JGI locus Ga0398403_939 (GspH/FimT family pseudopilin) with coords (971057, 971261)
--------------------

---------------------

---------------------
Hints for JGI locus Ga0398403_2156 (methyl-accepting chemotaxis protein) with coords (2373471, 2373741)
-->Overlapping NCBI feature EQU24_RS10675 with coords (2373471,2374449)
-->(2373471, 2374449, 1, 'EQU24_RS10675', '', 'CDS', 'transposase')
___Full Genbank entry___
type: CDS
location: [2373471:2374449](+)
qualifiers:
    Key: codon_start, Value: ['1']
    Key: inference, Value: ['COORDINATES: protein motif:HMM:NF024935.2']
    Key: locus_tag, Value: ['EQU24_RS10675']
    Key: note, Value: ['internal stop; Derived by automated computational analysis using gene prediction method: Protein Homology.']
    Key: old_locus_tag, Value: ['EQU24_10675']
    Key: product, Value: ['transposase']
    Key: pseudo, Value: ['']
    Key: transl_table, Value: ['11']

---------------------

---------------------
Hints for JGI locus Ga0398403_2162 (transposase) with coords (2376844, 2377063)
-->Overlapping NCBI feature EQU24_RS10690 with coords (2377018,2379

---------------------

---------------------
Hints for JGI locus Ga0398403_3115 (AI-2E family transporter) with coords (3428772, 3429036)
-->Overlapping NCBI feature EQU24_RS15340 with coords (3428312,3428828)
-->(3428312, 3428828, 1, 'EQU24_RS15340', '', 'CDS', 'adenine phosphoribosyltransferase')
___Full Genbank entry___
type: CDS
location: [3428312:3428828](+)
qualifiers:
    Key: EC_number, Value: ['2.4.2.7']
    Key: GO_function, Value: ['GO:0003999 - adenine phosphoribosyltransferase activity [Evidence IEA]']
    Key: GO_process, Value: ['GO:0009116 - nucleoside metabolic process [Evidence IEA]']
    Key: codon_start, Value: ['1']
    Key: inference, Value: ['COORDINATES: similar to AA sequence:RefSeq:WP_018054435.1']
    Key: locus_tag, Value: ['EQU24_RS15340']
    Key: note, Value: ['Derived by automated computational analysis using gene prediction method: Protein Homology.']
    Key: old_locus_tag, Value: ['EQU24_15345']
    Key: product, Value: ['adenine phosphoribosyltransfe

---------------------

---------------------
Hints for JGI locus Ga0398403_4125 (MMPL family transporter) with coords (4567173, 4567281)
---------------------

---------------------
Hints for JGI locus Ga0398403_4127 (MMPL family transporter) with coords (4568962, 4569070)
---------------------

---------------------
Hints for JGI locus Ga0398403_4168 (MMPL family transporter) with coords (4605232, 4605445)
-->Overlapping NCBI feature EQU24_RS20420 with coords (4604616,4605285)
-->(4604616, 4605285, 1, 'EQU24_RS20420', '', 'CDS', 'F0F1 ATP synthase subunit A')
___Full Genbank entry___
type: CDS
location: [4604616:4605285](+)
qualifiers:
    Key: EC_number, Value: ['7.1.2.2']
    Key: GO_component, Value: ['GO:0045263 - proton-transporting ATP synthase complex, coupling factor F(o) [Evidence IEA]']
    Key: GO_function, Value: ['GO:0015078 - proton transmembrane transporter activity [Evidence IEA]']
    Key: GO_process, Value: ['GO:0015986 - ATP synthesis coupled proton transport [Evide