### Here's how my database is set up:
I've got a table of genes, a table of enzymes, and a table of pathways. Depending on how much time I get later, I might also make some associative tables linking them.

### Gene Table
CREATE TABLE genes (id INT PRIMARY KEY ASC, name TEXT, description TEXT, organism TEXT,
chromosome TEXT, start INT, end INT, strand VARCHAR(1), sequence TEXT, translated TEXT)

**Start with name, description, organism, and nucleotide sequence. Additional fields might include chromosome, start and end position, strand, and translated sequence. For eukaryotes, the nucleotide sequence should be the spliced mRNA and the coordinates should span the entire locus.**

### Enzyme Table
CREATE TABLE enzymes (id INT PRIMARY KEY ASC, name TEXT, function TEXT, 
EC INT, pathway TEXT)
**EC = Enzyme Commission number
**name, function, and enzyme commission (EC) number. Multiple genes encode enzymes that perform the same function, so there ought to be fewer enzymes than genes.**

### Pathway Table
CREATE TABLE pathways (id INT PRIMARY KEY ASC, name TEXT, description TEXT)

In [25]:
#This segment connects to the metabolism database and readies it to receive input
import sqlite3
from Bio import Entrez
from Bio import SeqIO
Entrez.email = 'hverdonk@berkeley.edu'
conn = sqlite3.connect('metabolism.db')
c = conn.cursor()

In [48]:
#Creates emtpy Gene Table
c.execute("""CREATE TABLE genes (name TEXT, 
                                description TEXT, 
                                organism TEXT,  
                                nt_sequence TEXT)""")
conn.commit()

In [49]:
#Creates empty Enzyme Table
c.execute("""CREATE TABLE enzymes (name TEXT, 
                                    function TEXT, 
                                    EC INT, 
                                    pathway TEXT)""")
conn.commit()

In [33]:
#Creates empty Pathway Table
c.execute("""CREATE TABLE pathways (name TEXT, 
                                    description TEXT)""")
conn.commit()

In [34]:
#Fill the Pathway Table
c.execute("""INSERT INTO pathways
                  VALUES ('glycolysis', 
                            'converts glucose into pyruvate to generate ATP and NADH'),
                            ('citric acid cycle', 
                            'the complete oxidation of glucose derivatives to carbon dioxide to produce ATP'), 
                            ('pentose phosphate pathway', 
                            'converts glucose into pentose to generate NADPH and ribose 5-phosphate');""")
conn.commit()

In [35]:
#how to print a whole table
c.execute("""SELECT * FROM pathways""")
print(c.fetchall())

[('glycolysis', 'converts glucose into pyruvate to generate ATP and NADH'), ('citric acid cycle', 'the complete oxidation of glucose derivatives to carbon dioxide to produce ATP'), ('pentose phosphate pathway', 'converts glucose into pentose to generate NADPH and ribose 5-phosphate')]


In [72]:
#finds accession numbers for genes that match our search term (for every enzyme, add which pathway it's part of)
handle = Entrez.esearch(db='nucleotide',
                        term='FO203355.1',
                        sort='relevance',
                        idtype='acc')

#fetches the records corresponding to each accession number we found earlier
for i in Entrez.read(handle)['IdList']:
    handle=Entrez.efetch(db='nucleotide', id=i, rettype='gb', retmode='text')
    print(SeqIO.read(handle, 'gb') + '\n' + '\n')

ID: FO203355.1
Name: FO203355
Description: Enterobacter aerogenes EA1509E complete genome
Database cross-references: BioProject:PRJNA61107, BioSample:SAMEA3138432
Number of features: 5375
/molecule_type=DNA
/topology=circular
/data_file_division=BCT
/date=27-FEB-2015
/accessions=['FO203355']
/sequence_version=1
/keywords=['']
/source=Klebsiella aerogenes EA1509E
/organism=Klebsiella aerogenes EA1509E
/taxonomy=['Bacteria', 'Proteobacteria', 'Gammaproteobacteria', 'Enterobacterales', 'Enterobacteriaceae', 'Klebsiella']
/references=[Reference(title="The rhizome of the multidrug-resistant Enterobacter aerogenes genome reveals how new 'killer bugs' are created because of a sympatric lifestyle", ...), Reference(title='Direct Submission', ...)]
Seq('CCCGGTTCTCTCCGGACCTAAAACTGTGTTAAACTCTTCGCGTTTTTCTGTAAG...G

', IUPACAmbiguousDNA())


### Enzymes:
**choose 4 enzymes per pathway, choose 3 genes from each enzyme (one for human, one for *Drosophila*, one for *E. coli***

(accession #, name, desc)

glycolysis: 
    K01085 agp     glu-1-phosphate                  *E. Coli*
    
    K01623 ALDO    fructose-bisphosphate-aldolase   *
    
    K01689 ENO,eno enolase
    
    K13951 ADH1_7  alcohol dehydrogenase 1/7
    
citric acid:
pathways:

In [74]:
citric_acid_terms = ['EC 2.3.1.12', 'EC 1.2.4.1', 'EC 1.8.1.4', 'EC 4.1.1.49', 'EC 4.1.1.32']
glycolysis_terms = ['EC 5.4.2.2', 'EC 5.3.1.9', ]
pentose_phosphate_terms = []

#finds accession numbers for genes that match our search term (need name, function, EC#)
handle = Entrez.esearch(db='protein',
                        term='EC 5.4.2.2',
                        sort='relevance',
                        idtype='acc')

#fetches the records corresponding to each accession number we found earlier
for i in Entrez.read(handle)['IdList']:
    handle=Entrez.efetch(db='protein', id=i, rettype='gb', retmode='text')
    temp = SeqIO.read(handle, 'gb')
    print(temp.description)
    print(temp.annotations['accessions'])
    #print(SeqIO.read(handle, 'gb') + '\n' + '\n')

Phosphoglucomutase (EC 5.4.2.2) [Klebsiella aerogenes EA1509E]
['CCG31158']
phosphoglucomutase (ec 5.4.2.2) [Photorhabdus asymbiotica subsp. asymbiotica ATCC 43949]
['CAR67580']
Phosphoglucomutase (EC 5.4.2.2) [Enterococcus faecium]
['CUX98262']
Phosphoglucomutase (EC 5.4.2.2) [Streptococcus pneumoniae SPNA45]
['CCM08969']
phosphoglucomutase [Enterobacter roggenkampii EC_38VIM1]
['EPY97605']
phosphoglucomutase [Escherichia coli]
['PVH54613']
phosphoglucomutase [Escherichia coli]
['PVH53649']
phosphoglucomutase [Escherichia coli]
['PVH42237']
alpha-phosphoglucomutase / phosphomannomutase [Methanocella conradii HZ254]
['AFD00006']
phophoglucomutase-1 protein, putative [Cryptosporidium muris RN66]
['EEA08500']
pgm [Corynebacterium jeikeium K411]
['CAI36625']
Phosphoglucomutase [Bartonella tribocorum CIP 105476]
['CAK02027']
phosphoglucomutase [Neisseria meningitidis Z2491]
['CAM08223']
phosphoglucomutase [Ralstonia pickettii DTP0602]
['AGW91639']
phosphoglucomutase, alpha-D-glucose phosph