In [1]:
import pandas as pd
from Bio import Entrez, SeqIO
from mysql.connector import connection
from sshtunnel import SSHTunnelForwarder

Entrez.email = "l.singh@intbio.org"

In [2]:
with open("db_curated_server_info.txt", "r") as file:
    lines = file.readlines()

config = {}

for line in lines:
    line = line.strip()
    if line and not line.startswith("#"):
        key, value = line.split("=", 1)
        config[key] = value.strip()

server_name = config.get("server_name")
srever_port = int(config.get("srever_port"))
ssh_password = config.get("ssh_password")
ssh_username = config.get("ssh_username")
db_adress = config.get("db_adress")
db_port = int(config.get("db_port"))

In [3]:
tunnel = SSHTunnelForwarder(
    (server_name, srever_port),
    ssh_password=ssh_password,
    ssh_username=ssh_username,
    remote_bind_address=(db_adress, db_port),
)
tunnel.start()
print(tunnel.local_bind_port)

35767


In [4]:
conn = connection.MySQLConnection(
    user="db_user",
    password="db_password",
    host="localhost",
    port=tunnel.local_bind_port,
    database="db_name",
)
cursor = conn.cursor()

In [5]:
query = "SHOW TABLES;"
cursor.execute(query)
cursor.fetchall()

[('alternative_name',),
 ('histone',),
 ('histone_description',),
 ('histone_has_publication',),
 ('publication',),
 ('sequence',),
 ('sequence_has_publication',)]

In [18]:
# add_histone = (
#     "INSERT INTO histone "
#     "(id, level, taxonomic_span, taxonomic_span_id, description, parent) "
#     "VALUES (%(id)s, %(level)s, %(taxonomic_span)s, %(taxonomic_span_id)s, %(description)s, %(parent)s)"
# )
# add_histone_description = (
#     "INSERT INTO histone_description "
#     "(summary, taxonomy, genes, evolution, expression, knock_out, function, sequence, localization, deposition, structure, interactions, disease, caveats) "
#     "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
# )
# add_publication = (
#     "INSERT INTO publication "
#     "(id, title, doi, author, year) "
#     "VALUES (%(id)s, %(title)s, %(doi)s, %(author)s, %(year)s)"
# )
# add_sequence_has_publication = (
#     "INSERT INTO sequence_has_publication "
#     "(sequence_accession, publication_id) "
#     "VALUES (%s, %s)"
# )
# add_alternate_names = (
#     "INSERT INTO alternative_name "
#     "(name, taxonomy, gene, splice, histone) "
#     "VALUES (%(name)s, %(taxonomy)s, %(gene)s, %(splice)s, %(histone)s)"
# )
add_histone_has_publication = (
    "INSERT INTO histone_has_publication "
    "(histone_id, publication_id) "
    "VALUES (%s, %s)"
)

# Change name for some variants

H2B.O → H2B.O_(Ornithorhynchus_anatinus)

H2B.V → H2B.V_(Trypanosoma)

H2B.Z → H2B.Z_(Apicomplexa)

cen_H3_(Eukarya) → cen_H3

## Change name from H2B.O → H2B.O_(Ornithorhynchus_anatinus)

In [6]:
prev_name, new_name = "H2B.O", "H2B.O_(Ornithorhynchus_anatinus)"

In [7]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.O,variant_group,Ornithorhynchus anatinus,9258,39,H2B,H2B.O,raman_novel_2022


### Save publications

In [8]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
publications = pd.DataFrame(
    cursor.fetchall(), columns=[i[0] for i in cursor.description]
)["publication_id"].values
publications

array(['raman_novel_2022'], dtype=object)

### Delete relations

In [9]:
query = f"DELETE FROM histone_has_publication WHERE histone_id = '{prev_name}'" 
print(query) 
cursor.execute(query)

DELETE FROM histone_has_publication WHERE histone_id = 'H2B.O'


In [10]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.O,variant_group,Ornithorhynchus anatinus,9258,39,H2B,,


### Update name

In [11]:
query = f"UPDATE histone SET id='{new_name}' WHERE id = '{prev_name}'" 
print(query) 
cursor.execute(query) 

UPDATE histone SET id='H2B.O_(Ornithorhynchus_anatinus)' WHERE id = 'H2B.O'


In [12]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.O_(Ornithorhynchus_anatinus),variant_group,Ornithorhynchus anatinus,9258,39,H2B,,


### Return relations

In [13]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
summary = (
    pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])[
        "summary"
    ]
    .values[0]
    .replace(prev_name, new_name, 1)
)
summary

'H2B.O_(Ornithorhynchus_anatinus)  is a class of H2B sequences identified exclusively in platypus genome that group together in phylogenetic analysis. H2B.O expression appears to be enriched in platypus’ germline tissues (testes or ovaries) albeit at low levels [raman_novel_2022].'

In [15]:
query = f"UPDATE histone_description SET summary='{summary}' WHERE id = 39" 
print(query) 
cursor.execute(query) 

UPDATE histone_description SET summary='H2B.O_(Ornithorhynchus_anatinus)  is a class of H2B sequences identified exclusively in platypus genome that group together in phylogenetic analysis. H2B.O expression appears to be enriched in platypus’ germline tissues (testes or ovaries) albeit at low levels [raman_novel_2022].' WHERE id = 39


In [16]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,expression,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats
0,H2B.O_(Ornithorhynchus_anatinus),variant_group,Ornithorhynchus anatinus,9258,39,H2B,39,H2B.O_(Ornithorhynchus_anatinus) is a class o...,Platypus [raman_novel_2022].,,...,H2B.O expression appears to be enriched in pla...,,,,,,,,,


In [19]:
query = "SELECT id FROM publication"
cursor.execute(query)
exist_pubs = [i[0] for i in cursor.fetchall()]
for pid in publications:
    if pid not in exist_pubs:
        print(f"Strange {pid}")
    cursor.execute(add_histone_has_publication, (new_name, pid))

In [20]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.O_(Ornithorhynchus_anatinus),variant_group,Ornithorhynchus anatinus,9258,39,H2B,H2B.O_(Ornithorhynchus_anatinus),raman_novel_2022


In [21]:
# Make sure data is committed to the database
conn.commit()

## Change name from H2B.V → H2B.V_(Trypanosoma)

In [22]:
prev_name, new_name = "H2B.V", "H2B.V_(Trypanosoma)"

In [23]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.V,variant_group,Trypanosoma,5690,41,H2B,H2B.V,lowell_histone_2005


### Save publications

In [24]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
publications = pd.DataFrame(
    cursor.fetchall(), columns=[i[0] for i in cursor.description]
)["publication_id"].values
publications

array(['lowell_histone_2005'], dtype=object)

### Delete relations

In [25]:
query = f"DELETE FROM histone_has_publication WHERE histone_id = '{prev_name}'" 
print(query) 
cursor.execute(query)

DELETE FROM histone_has_publication WHERE histone_id = 'H2B.V'


In [26]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.V,variant_group,Trypanosoma,5690,41,H2B,,


### Update name

In [27]:
query = f"UPDATE histone SET id='{new_name}' WHERE id = '{prev_name}'" 
print(query) 
cursor.execute(query) 

UPDATE histone SET id='H2B.V_(Trypanosoma)' WHERE id = 'H2B.V'


In [28]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.V_(Trypanosoma),variant_group,Trypanosoma,5690,41,H2B,,


### Return relations

In [29]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
summary = (
    pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])[
        "summary"
    ]
    .values[0]
    .replace(prev_name, new_name, 1)
)
summary

'H2B.V_(Trypanosoma) is a histone variant characterized so far in Trypanosoma brucei. It shares ~38% sequence identity with major H2B. H2B.V is essential for viability. H2A.Z and H2B.V colocalize throughout the cell cycle and exhibit nearly identical genomic distribution. Data strongly suggest that H2A.Z and H2B.V function together within a single nucleosome [lowell_histone_2005]. H2BV possibly regulates H3 K4 and K76 trimethylation in Trypanosoma brucei [mandava_trypanosome_2008].'

In [30]:
query = f"UPDATE histone_description SET summary='{summary}' WHERE id = 41" 
print(query) 
cursor.execute(query) 

UPDATE histone_description SET summary='H2B.V_(Trypanosoma) is a histone variant characterized so far in Trypanosoma brucei. It shares ~38% sequence identity with major H2B. H2B.V is essential for viability. H2A.Z and H2B.V colocalize throughout the cell cycle and exhibit nearly identical genomic distribution. Data strongly suggest that H2A.Z and H2B.V function together within a single nucleosome [lowell_histone_2005]. H2BV possibly regulates H3 K4 and K76 trimethylation in Trypanosoma brucei [mandava_trypanosome_2008].' WHERE id = 41


In [31]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,expression,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats
0,H2B.V_(Trypanosoma),variant_group,Trypanosoma,5690,41,H2B,41,H2B.V_(Trypanosoma) is a histone variant chara...,Trypanosoma,,...,,,,,,,,,,


In [32]:
query = "SELECT id FROM publication"
cursor.execute(query)
exist_pubs = [i[0] for i in cursor.fetchall()]
for pid in publications:
    if pid not in exist_pubs:
        print(f"Strange {pid}")
    cursor.execute(add_histone_has_publication, (new_name, pid))

In [33]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.V_(Trypanosoma),variant_group,Trypanosoma,5690,41,H2B,H2B.V_(Trypanosoma),lowell_histone_2005


In [34]:
# Make sure data is committed to the database
conn.commit()

## Change name from H2B.Z → H2B.Z_(Apicomplexa)

In [35]:
prev_name, new_name = "H2B.Z", "H2B.Z_(Apicomplexa)"

In [36]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.Z,variant_group,Apicomplexa,5794,43,H2B,H2B.Z,logie_apicomplexa_2020


### Save publications

In [37]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
publications = pd.DataFrame(
    cursor.fetchall(), columns=[i[0] for i in cursor.description]
)["publication_id"].values
publications

array(['logie_apicomplexa_2020'], dtype=object)

### Delete relations

In [38]:
query = f"DELETE FROM histone_has_publication WHERE histone_id = '{prev_name}'" 
print(query) 
cursor.execute(query)

DELETE FROM histone_has_publication WHERE histone_id = 'H2B.Z'


In [39]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.Z,variant_group,Apicomplexa,5794,43,H2B,,


### Update name

In [40]:
query = f"UPDATE histone SET id='{new_name}' WHERE id = '{prev_name}'" 
print(query) 
cursor.execute(query) 

UPDATE histone SET id='H2B.Z_(Apicomplexa)' WHERE id = 'H2B.Z'


In [41]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.Z_(Apicomplexa),variant_group,Apicomplexa,5794,43,H2B,,


### Return relations

In [42]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
summary = (
    pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])[
        "summary"
    ]
    .values[0]
    .replace(prev_name, new_name, 1)
)
summary

'H2B.Z_(Apicomplexa) is an apicomplexan specific variant. Different studies performed in Toxoplasma have shown a nucleosome composition in which H2A.Z, but not H2A.X, dimerizes with H2B.Z, while H2A.X dimerizes with canonical H2B (H2Ba in T. gondii), but never with H2B.Z. This fact is also seen in P. falciparum, although this parasite lacks H2A.X variant and has driven the hypothesis of a new double variant nucleosome exclusive of parasites with particular characteristics [logie_apicomplexa_2020].'

In [43]:
query = f"UPDATE histone_description SET summary='{summary}' WHERE id = 43" 
print(query) 
cursor.execute(query) 

UPDATE histone_description SET summary='H2B.Z_(Apicomplexa) is an apicomplexan specific variant. Different studies performed in Toxoplasma have shown a nucleosome composition in which H2A.Z, but not H2A.X, dimerizes with H2B.Z, while H2A.X dimerizes with canonical H2B (H2Ba in T. gondii), but never with H2B.Z. This fact is also seen in P. falciparum, although this parasite lacks H2A.X variant and has driven the hypothesis of a new double variant nucleosome exclusive of parasites with particular characteristics [logie_apicomplexa_2020].' WHERE id = 43


In [44]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,expression,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats
0,H2B.Z_(Apicomplexa),variant_group,Apicomplexa,5794,43,H2B,43,H2B.Z_(Apicomplexa) is an apicomplexan specifi...,,,...,H2B.Z expression was found to be relatively c...,,,,,,,,,Former name for this histone variant H2Bv from...


In [45]:
query = "SELECT id FROM publication"
cursor.execute(query)
exist_pubs = [i[0] for i in cursor.fetchall()]
for pid in publications:
    if pid not in exist_pubs:
        print(f"Strange {pid}")
    cursor.execute(add_histone_has_publication, (new_name, pid))

In [46]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,H2B.Z_(Apicomplexa),variant_group,Apicomplexa,5794,43,H2B,H2B.Z_(Apicomplexa),logie_apicomplexa_2020


In [91]:
# Make sure data is committed to the database
conn.commit()

## Change name from cenH3_(Eukarya) → cenH3

In [49]:
prev_name, new_name = "cenH3_(Eukarya)", "cenH3"

In [50]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,cenH3_(Eukarya),variant_group,,,46,H3,,


In [51]:
query = (
    "SELECT * FROM histone "
    f"WHERE parent='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent
0,cenH3_(Animals),variant,Homo sapiens,9606,94,cenH3_(Eukarya)
1,cenH3_(Fungi),variant,Homo sapiens,9606,95,cenH3_(Eukarya)
2,cenH3_(Plants),variant,Eukaryotes,2759,96,cenH3_(Eukarya)


### Save children

In [52]:
query = (
    "SELECT * FROM histone "
    f"WHERE parent='{prev_name}'"
)
cursor.execute(query)
children = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])["id"].values
children

array(['cenH3_(Animals)', 'cenH3_(Fungi)', 'cenH3_(Plants)'], dtype=object)

### Delete relations

In [55]:
query = f"UPDATE histone SET parent=null WHERE parent = '{prev_name}'" 
print(query) 
cursor.execute(query) 

UPDATE histone SET parent=null WHERE parent = 'cenH3_(Eukarya)'


In [57]:
query = (
    "SELECT * FROM histone "
    f"WHERE parent='{prev_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent


### Update name

In [58]:
query = f"UPDATE histone SET id='{new_name}' WHERE id = '{prev_name}'" 
print(query) 
cursor.execute(query) 

UPDATE histone SET id='cenH3' WHERE id = 'cenH3_(Eukarya)'


In [59]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_has_publication hp "
    "ON h.id = hp.histone_id "
    f"WHERE h.id='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,histone_id,publication_id
0,cenH3,variant_group,,,46,H3,,


### Return relations

In [60]:
query = (
    "SELECT * FROM histone "
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df[df["id"].isin(children)]

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent
2,cenH3_(Animals),variant,Homo sapiens,9606,94,
3,cenH3_(Fungi),variant,Homo sapiens,9606,95,
6,cenH3_(Plants),variant,Eukaryotes,2759,96,


In [62]:
for ch in children:
    query = f"UPDATE histone SET parent='{new_name}' WHERE id = '{ch}'" 
    print(query)
    cursor.execute(query)

UPDATE histone SET parent='cenH3' WHERE id = 'cenH3_(Animals)'
UPDATE histone SET parent='cenH3' WHERE id = 'cenH3_(Fungi)'
UPDATE histone SET parent='cenH3' WHERE id = 'cenH3_(Plants)'


In [63]:
query = (
    "SELECT * FROM histone "
    f"WHERE parent='{new_name}'"
)
cursor.execute(query)
pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent
0,cenH3_(Animals),variant,Homo sapiens,9606,94,cenH3
1,cenH3_(Fungi),variant,Homo sapiens,9606,95,cenH3
2,cenH3_(Plants),variant,Eukaryotes,2759,96,cenH3


In [64]:
# Make sure data is committed to the database
conn.commit()

# Close connections

In [65]:
cursor.close()
conn.close()
tunnel.stop()