In [1]:
# import dependencies
import os, inspect
import sqlalchemy as sqlalc
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.ext.automap import automap_base

In [2]:
# get path to current and root directories
CURR_DIR = os.path.dirname(inspect.getabsfile(inspect.currentframe()))
ROOT_DIR = os.path.dirname(CURR_DIR)\

We start off by creating a SQLAlchemy engine that allows us to connect and interface with sql database.

In [3]:
# create sqlite engine and connect to data base
fname = os.path.join(ROOT_DIR, "bbbioboard", "data", "ext", "bellybutton.sqlite")
engine = sqlalc.create_engine(f"sqlite:///{fname}")

Having the database connected to the engine, now we will list up the available tables:

In [4]:
# explore table names
engine.table_names()

['sample_metadata', 'samples']

This show that the current database has two tables with names of 'sample_metadata' and 'samples. With table names collected, now we can dig into the tables data and explore the table columns. We implement this by reflecting the database to an instance of automap_base():

In [5]:
Base = automap_base()
Base.prepare(engine, reflect=True)

This will create the mapped classes with the default table names that can be access as follows:

In [6]:
# reflect the database
samplesTable = Base.classes.samples
metadataTtable = Base.classes.sample_metadata

Finally, we need to create a session and bind to the database. This will allows us to query the database:

In [14]:
# create session
Session = sqlalc.orm.sessionmaker(bind=engine)
session = Session()

I have got the tables set up and reflected. Let's explore the tables, looking at column names and data types etc:

In [15]:
repr(samplesTable.metadata.tables["samples"])

"Table('samples', MetaData(bind=None), Column('otu_id', INTEGER(), table=<samples>, primary_key=True, nullable=False), Column('otu_label', TEXT(), table=<samples>), Column('940', INTEGER(), table=<samples>), Column('941', INTEGER(), table=<samples>), Column('943', INTEGER(), table=<samples>), Column('944', INTEGER(), table=<samples>), Column('945', INTEGER(), table=<samples>), Column('946', INTEGER(), table=<samples>), Column('947', INTEGER(), table=<samples>), Column('948', INTEGER(), table=<samples>), Column('949', INTEGER(), table=<samples>), Column('950', INTEGER(), table=<samples>), Column('952', INTEGER(), table=<samples>), Column('953', INTEGER(), table=<samples>), Column('954', INTEGER(), table=<samples>), Column('955', INTEGER(), table=<samples>), Column('956', INTEGER(), table=<samples>), Column('958', INTEGER(), table=<samples>), Column('959', INTEGER(), table=<samples>), Column('960', INTEGER(), table=<samples>), Column('961', INTEGER(), table=<samples>), Column('962', INTE

It seems that there is something wrong with column names of samples table. Column names consist of only digit that would not work with  directly querying at Python. Lets take a look specifically on column names:

In [9]:
repr(samplesTable.__table__.columns.keys())

"['otu_id', 'otu_label', '940', '941', '943', '944', '945', '946', '947', '948', '949', '950', '952', '953', '954', '955', '956', '958', '959', '960', '961', '962', '963', '964', '966', '967', '968', '969', '970', '971', '972', '973', '974', '975', '978', '1233', '1234', '1235', '1236', '1237', '1238', '1242', '1243', '1246', '1253', '1254', '1258', '1259', '1260', '1264', '1265', '1273', '1275', '1276', '1277', '1278', '1279', '1280', '1281', '1282', '1283', '1284', '1285', '1286', '1287', '1288', '1289', '1290', '1291', '1292', '1293', '1294', '1295', '1296', '1297', '1298', '1308', '1309', '1310', '1374', '1415', '1439', '1441', '1443', '1486', '1487', '1489', '1490', '1491', '1494', '1495', '1497', '1499', '1500', '1501', '1502', '1503', '1504', '1505', '1506', '1507', '1508', '1510', '1511', '1512', '1513', '1514', '1515', '1516', '1517', '1518', '1519', '1521', '1524', '1526', '1527', '1530', '1531', '1532', '1533', '1534', '1535', '1536', '1537', '1539', '1540', '1541', '1542', 

It seems that there is something wrong with column names of samples table. Column names consist of only digit that would not work with  directly querying at Python. Lets take a look specifically on column names:

In [10]:
# def func(x):
#     x.name = "bb_{}".format(x.name)
#     x.key = "bb_{}".format(x.key)
    
# list(map(func, samplesTable.__table__.columns.values()[2:]))
# repr(samplesTable.__table__.columns.values())

In [11]:
inspector = sqlalc.inspect(engine)
@sqlalc.event.listens_for(sqlalc.Table, "column_reflect")
def reflect_col(inspector, table, column_info):
    column_info['key'] = "bb_{}".format(column_info['name'])

In [12]:
Base = automap_base()
Base.prepare(engine, reflect=True)

SyntaxError: invalid syntax (<ipython-input-13-777982a912f9>, line 1)