# Export annotation data to aiven database

In [1]:
from dotenv import load_dotenv
from sqlalchemy import create_engine, text
import os
import pandas as pd

In [2]:
load_dotenv()
db_host = os.getenv('DB_HOST')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASS')
db_port = 10184
database = "defaultdb"
host_url = rf"mysql://{db_user}:{db_password}@{db_host}:{db_port}/{database}"

engine = create_engine(host_url)

Now, read csv to pandas dataframe

In [3]:
annot_df = pd.read_csv('neuron_annotations.csv', index_col=0)
annot_df

Unnamed: 0,experiment,Repeats,condition,date,time,Idents
20181215_CLK856_LD_ZT14_AR07_ACAGGA,CLK856_LD,LD_2,LD,2018-12-15,ZT14,29:LPN
20181215_CLK856_LD_ZT14_AR07_CTTCTG,CLK856_LD,LD_2,LD,2018-12-15,ZT14,29:LPN
20181231_CLK856_LD_ZT14_AR02_GTACCA,CLK856_LD,LD_1,LD,2018-12-31,ZT14,29:LPN
20181231_CLK856_LD_ZT14_AR02_TCCTTC,CLK856_LD,LD_1,LD,2018-12-31,ZT14,29:LPN
20181231_CLK856_LD_ZT14_AR08_AGACAG,CLK856_LD,LD_1,LD,2018-12-31,ZT14,29:LPN
...,...,...,...,...,...,...
20190710_CLK856_DD_CT06_AR04_GTCTTC,CLK856_DD,DD_1,DD,2019-07-10,CT06,1:DN1p_CNMa
20190710_CLK856_DD_CT06_AR04_TCCTTC,CLK856_DD,DD_1,DD,2019-07-10,CT06,1:DN1p_CNMa
20190719_CLK856_DD_CT06_AR18_GTCTTC,CLK856_DD,DD_2,DD,2019-07-19,CT06,1:DN1p_CNMa
20190814_CLK856_DD_CT06_AR19_TCTGCA,CLK856_DD,DD_2,DD,2019-08-14,CT06,1:DN1p_CNMa


In [14]:
len("20190001_CLK856_DD_CT14_AR01_CTTCTG")

35

First, create table

In [18]:
with engine.connect() as connection:
    connection.execute(text('DROP TABLE annotations'))

In [19]:
ddl = """CREATE TABLE "annotations" (
        "single_cell" CHAR(36) PRIMARY KEY,
        "experiment" VARCHAR(255),
        "Repeats" VARCHAR(255),
        "condition" VARCHAR(255),
        "date" DATE,
        "time" VARCHAR(255),
        "Idents" VARCHAR(255)
        )"""  

with engine.connect() as connection:
    connection.execute(text(ddl))

In [20]:
annot_df.to_sql(name='annotations',
                con=engine,
                index=True,
                if_exists='append',
                index_label='single_cell')

2615

In [21]:
df_result = pd.read_sql("SELECT * FROM annotations", con=engine, index_col='single_cell')
df_result

Unnamed: 0_level_0,experiment,Repeats,condition,date,time,Idents
single_cell,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
20181215_CLK856_LD_ZT14_AR07_ACAGAC,CLK856_LD,LD_2,LD,2018-12-15,ZT14,1:DN1p_CNMa
20181215_CLK856_LD_ZT14_AR07_ACAGGA,CLK856_LD,LD_2,LD,2018-12-15,ZT14,29:LPN
20181215_CLK856_LD_ZT14_AR07_ACCAAC,CLK856_LD,LD_2,LD,2018-12-15,ZT14,15:DN1p_CNMa
20181215_CLK856_LD_ZT14_AR07_ACCAGA,CLK856_LD,LD_2,LD,2018-12-15,ZT14,8:LN_ITP
20181215_CLK856_LD_ZT14_AR07_ACCATG,CLK856_LD,LD_2,LD,2018-12-15,ZT14,3:DN1a
...,...,...,...,...,...,...
20190814_CLK856_DD_CT06_AR20_TCACCA,CLK856_DD,DD_2,DD,2019-08-14,CT06,7:DN1p
20190814_CLK856_DD_CT06_AR20_TCCTTC,CLK856_DD,DD_2,DD,2019-08-14,CT06,9:LNd_NPF
20190814_CLK856_DD_CT06_AR20_TGAGAC,CLK856_DD,DD_2,DD,2019-08-14,CT06,3:DN1a
20190814_CLK856_DD_CT06_AR20_TGAGGA,CLK856_DD,DD_2,DD,2019-08-14,CT06,14:DN3


In [22]:
# Use the raw SQL query to get all tables
query = "SHOW TABLES"
tables_df = pd.read_sql(query, con=engine)
tables_df

Unnamed: 0,Tables_in_defaultdb
0,annotations


In [23]:
query = f"DESCRIBE annotations"
columns_df = pd.read_sql(query, con=engine)
columns_df

Unnamed: 0,Field,Type,Null,Key,Default,Extra
0,single_cell,char(36),NO,PRI,,
1,experiment,varchar(255),YES,,,
2,Repeats,varchar(255),YES,,,
3,condition,varchar(255),YES,,,
4,date,date,YES,,,
5,time,varchar(255),YES,,,
6,Idents,varchar(255),YES,,,


In [24]:
with engine.connect() as connection:
    connection.execute(text('COMMIT'))

In [59]:
pd.read_sql("SHOW TABLES", con=engine)

Unnamed: 0,Tables_in_defaultdb
0,GSM4768020_CT02_20190528_AR05
1,GSM4768021_CT02_20190528_AR06
2,GSM4768022_CT02_20190528_AR07
3,GSM4768023_CT02_20190528_AR08
4,GSM4768024_CT02_20190702_AR13
5,GSM4768025_CT02_20190702_AR14
6,GSM4768026_CT02_20190702_AR15
7,GSM4768027_CT02_20190702_AR16
8,GSM4768028_CT06_20190710_AR01
9,GSM4768029_CT06_20190710_AR02
