### Here's how my database is set up:
I've got a table of genes, a table of enzymes, and a table of pathways. Depending on how much time I get later, I might also make some associative tables linking them.

### Gene Table
CREATE TABLE genes (id INT PRIMARY KEY ASC, name TEXT, description TEXT, organism TEXT,
chromosome TEXT, start INT, end INT, strand VARCHAR(1), sequence TEXT, translated TEXT)

**Start with name, description, organism, and nucleotide sequence. Additional fields might include chromosome, start and end position, strand, and translated sequence. For eukaryotes, the nucleotide sequence should be the spliced mRNA and the coordinates should span the entire locus.**

### Enzyme Table
CREATE TABLE enzymes (id INT PRIMARY KEY ASC, name TEXT, function TEXT, 
EC INT, pathway TEXT)
**EC = Enzyme Commission number
**name, function, and enzyme commission (EC) number. Multiple genes encode enzymes that perform the same function, so there ought to be fewer enzymes than genes.**

### Pathway Table
CREATE TABLE pathways (id INT PRIMARY KEY ASC, name TEXT, description TEXT)

In [25]:
#This segment connects to the metabolism database and readies it to receive input
import sqlite3
from Bio import Entrez
from Bio import SeqIO
Entrez.email = 'hverdonk@berkeley.edu'
conn = sqlite3.connect('metabolism.db')
c = conn.cursor()

In [48]:
#Creates emtpy Gene Table
c.execute("""CREATE TABLE genes (name TEXT, 
                                description TEXT, 
                                organism TEXT,  
                                nt_sequence TEXT)""")
conn.commit()

In [49]:
#Creates empty Enzyme Table
c.execute("""CREATE TABLE enzymes (name TEXT, 
                                    function TEXT, 
                                    EC INT, 
                                    pathway TEXT)""")
conn.commit()

In [33]:
#Creates empty Pathway Table
c.execute("""CREATE TABLE pathways (name TEXT, 
                                    description TEXT)""")
conn.commit()

In [34]:
#Fill the Pathway Table
c.execute("""INSERT INTO pathways
                  VALUES ('glycolysis', 
                            'converts glucose into pyruvate to generate ATP and NADH'),
                            ('citric acid cycle', 
                            'the complete oxidation of glucose derivatives to carbon dioxide to produce ATP'), 
                            ('pentose phosphate pathway', 
                            'converts glucose into pentose to generate NADPH and ribose 5-phosphate');""")
conn.commit()

In [35]:
#how to print a whole table
c.execute("""SELECT * FROM pathways""")
print(c.fetchall())

[('glycolysis', 'converts glucose into pyruvate to generate ATP and NADH'), ('citric acid cycle', 'the complete oxidation of glucose derivatives to carbon dioxide to produce ATP'), ('pentose phosphate pathway', 'converts glucose into pentose to generate NADPH and ribose 5-phosphate')]


In [47]:
#finds accession numbers for genes that match our search term (for every enzyme, add which pathway it's part of)
handle = Entrez.esearch(db='nucleotide',
                        term='homo sapiens[ORGN] BRCA1',
                        sort='relevance',
                        idtype='acc')

#fetches the records corresponding to each accession number we found earlier
for i in Entrez.read(handle)['IdList']:
    handle=Entrez.efetch(db='nucleotide', id=i, rettype='gb', retmode='text')
    print(SeqIO.read(handle, 'gb') + '\n' + '\n')

ID: MF945608.1
Name: MF945608
Description: Homo sapiens isolate 44 BRCA1 (BRCA1) gene, partial cds
Number of features: 4
/molecule_type=DNA
/topology=linear
/data_file_division=PRI
/date=23-AUG-2018
/accessions=['MF945608']
/sequence_version=1
/keywords=['']
/source=Homo sapiens (human)
/organism=Homo sapiens
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/references=[Reference(title='BRCA1 novel variation V1736D and in silico analysis of SNP Q356R in Sudanese patients with breast cancer', ...), Reference(title='Direct Submission', ...)]
/structured_comment=OrderedDict([('Assembly-Data', OrderedDict([('Assembly Method', 'BioEdit v. 7.0.9.0'), ('Sequencing Technology', 'Sanger dideoxy sequencing')]))])
Seq('CTCACTAAAGACAGAATGAATGTAGAAAAGGCTGAATTCTGTAATAAAAGCAAA...G

', IUPACAmbiguousDNA())
ID: MF945607.1
Name: MF945607
Description: Homo sapi

ID: FJ940752.1
Name: FJ940752
Description: Homo sapiens BRCA1 (BRCA1) gene, exon 18 and partial cds
Number of features: 8
/molecule_type=DNA
/topology=linear
/data_file_division=PRI
/date=24-JUL-2016
/accessions=['FJ940752']
/sequence_version=1
/keywords=['']
/source=Homo sapiens (human)
/organism=Homo sapiens
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/references=[Reference(title='Novel mutation (c.5118-20 del AAT; p.1707 del isoleucine) in exon 18 of BRCA1 gene', ...), Reference(title='Direct Submission', ...)]
Seq('GGCTCTTTAGCTTCTTAGGACAGCACTTCCTGATTTTGTTTTCAACTTCTAATC...C

', IUPACAmbiguousDNA())
ID: KR711445.1
Name: KR711445
Description: Synthetic construct Homo sapiens clone CCSBHm_00023739 BRCA1 (BRCA1) mRNA, encodes complete protein
Number of features: 5
/molecule_type=DNA
/topology=linear
/data_file_division=SYN
/date=01-JUN-20

ID: Y08757.1
Name: Y08757
Description: H.sapiens BRCA1 gene, breast and ovarian cancer rearrangement
Number of features: 11
/molecule_type=DNA
/topology=linear
/data_file_division=PRI
/date=26-JUL-2016
/accessions=['Y08757']
/sequence_version=1
/keywords=['BRCA1 gene']
/source=Homo sapiens (human)
/organism=Homo sapiens
/taxonomy=['Eukaryota', 'Metazoa', 'Chordata', 'Craniata', 'Vertebrata', 'Euteleostomi', 'Mammalia', 'Eutheria', 'Euarchontoglires', 'Primates', 'Haplorrhini', 'Catarrhini', 'Hominidae', 'Homo']
/references=[Reference(title='Germ-line rearrangement of the BRCA1 gene in a breast and ovarian cancer family', ...), Reference(title='Direct Submission', ...)]
Seq('GCCGTGTCTGGCCCAGTATATATTTTTTAAGTTTTAAGTTTTGTGGTACGTAGT...A

', IUPACAmbiguousDNA())


### Enzymes:
**choose 4 enzymes per pathway, choose 3 genes from each enzyme (one for human, one for *Drosophila*, one for *E. coli***

(accession #, name, desc)

glycolysis: 
    K01085 agp     glu-1-phosphate
    K01623 ALDO    fructose-bisphosphate-aldolase
    K01689 ENO,eno enolase
    K13951 ADH1_7  alcohol dehydrogenase 1/7
    
citric acid:
pathways:

In [60]:
#finds accession numbers for genes that match our search term (need name, function, EC#)
handle = Entrez.esearch(db='protein',
                        term='agp',
                        sort='relevance',
                        idtype='acc')

#fetches the records corresponding to each accession number we found earlier
for i in Entrez.read(handle)['IdList']:
    handle=Entrez.efetch(db='protein', id=i, rettype='gb', retmode='text')
    temp = SeqIO.read(handle, 'gb')
    print(temp.description)
    #print(SeqIO.read(handle, 'gb') + '\n' + '\n') #SeqIO.read(handle, 'gb') + '\n' + '\n'

Agp, partial [Salmonella enterica subsp. enterica serovar Typhimurium]
Agp [Pantoea ananatis LMG 20103]
Agp [Pantoea ananatis LMG 20103]
agp [Escherichia coli O104:H4 str. C227-11]
acid glucose-1-phosphate phosphatase precursor [Enterobacter cloacae]
AGP [Solanum tuberosum]
Agp [Klebsiella pneumoniae]
Agp [Dickeya zeae EC1]
Agp [Klebsiella pneumoniae subsp. pneumoniae PittNDM01]
Agp [Pantoea agglomerans Eh318]
Agp [Pantoea agglomerans]
Agp [Klebsiella pneumoniae EGD-HP19-C]
Agp [Pseudomonas sp. TKP]
Agp [Pseudomonas sp. TKP]
Agp [Klebsiella pneumoniae CG43]
Agp [Klebsiella pneumoniae 303K]
Agp [Pantoea ananatis BRT175]
Agp [Pantoea agglomerans Tx10]
Agp [Pantoea dispersa EGD-AAK13]
alpha-1-acid glycoprotein precursor [Mus caroli]
