# Clickhouse sql alchemy

Connect to clickhouse database using sqlalchemy and clickhouse driver - https://pypi.org/project/clickhouse-sqlalchemy/

In [20]:
import numpy as np
import numpy
import inspect
import pandas as pd
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from sqlalchemy import func

In [21]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Create SqlAlchemy engine

In [22]:
ch_url = 'clickhouse+native://default:@localhost/default'
engine = sa.create_engine(ch_url)

Create reusable function

In [23]:
def execute_sql(engine, sql):
    with engine.begin() as conn:
        result = conn.execute(sql)
        for row in result:
            print(row)

Get list of tables

In [24]:
execute_sql(engine, 'show tables')

('.inner_id.3489d528-1eb4-44a7-b489-d5281eb444a7',)
('flight',)
('flight_view',)
('visits',)


Get table metadata

Use pandas to read from SqlAlchemy engine

In [25]:
sql = "select * from system.tables where database like 'default'"
pd.read_sql(sql, engine)

Unnamed: 0,database,name,uuid,engine,is_temporary,data_paths,metadata_path,metadata_modification_time,dependencies_database,dependencies_table,...,partition_key,sorting_key,primary_key,sampling_key,storage_policy,total_rows,total_bytes,lifetime_rows,lifetime_bytes,comment
0,default,.inner_id.3489d528-1eb4-44a7-b489-d5281eb444a7,e8556d44-1738-4a55-a855-6d441738ca55,AggregatingMergeTree,0,[/var/lib/clickhouse/store/e85/e8556d44-1738-4...,/var/lib/clickhouse/store/f4e/f4ec22fb-97dd-4d...,2021-08-16 01:49:58,[],[],...,,"Origin, Year, Month","Origin, Year, Month",,default,59102.0,515653.0,,,
1,default,flight,eb198509-c1bb-4c28-ab19-8509c1bbfc28,MergeTree,0,[/var/lib/clickhouse/store/eb1/eb198509-c1bb-4...,/var/lib/clickhouse/store/f4e/f4ec22fb-97dd-4d...,2021-08-16 01:49:58,[default],[flight_view],...,,Year,Year,,default,123534969.0,3001317000.0,,,
2,default,flight_view,3489d528-1eb4-44a7-b489-d5281eb444a7,MaterializedView,0,[/var/lib/clickhouse/store/e85/e8556d44-1738-4...,/var/lib/clickhouse/store/f4e/f4ec22fb-97dd-4d...,2021-08-16 01:49:58,[],[],...,,,,,,,,,,
3,default,visits,a1ff40f8-d9ad-475e-a1ff-40f8d9ad375e,MergeTree,0,[/var/lib/clickhouse/disks/s3/store/a1f/a1ff40...,/var/lib/clickhouse/store/f4e/f4ec22fb-97dd-4d...,2021-08-18 14:48:04,[],[],...,,id,id,,s3,4.0,371.0,,,


In [26]:
metadata = sa.MetaData(bind=engine)
metadata.reflect(only=['flight'])
flight_tbl = metadata.tables['flight']

for column in flight_tbl.columns:
    print(column.name)

Year
Month
DayofMonth
DayOfWeek
DepTime
CRSDepTime
ArrTime
CRSArrTime
UniqueCarrier
FlightNum
TailNum
ActualElapsedTime
CRSElapsedTime
AirTime
ArrDelay
DepDelay
Origin
Dest
Distance
TaxiIn
TaxiOut
Cancelled
CancellationCode
Diverted
CarrierDelay
WeatherDelay
NASDelay
SecurityDelay
LateAircraftDelay


Use SqlAlchemy ORM to query the database

In [27]:
Session = sessionmaker(bind=engine)
session = Session()

qry = session.query(flight_tbl)

for row in session.query(flight_tbl).filter(
        flight_tbl.c.Month == 2, flight_tbl.c.DayofMonth == 29).limit(10):
    print(row)

(1988, 2, 29, 1, 957, 1000, 1054, 1104, 'PI', 894, None, 57, 64, None, -10, -3, 'DCA', 'SYR', 298, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 704, 705, 746, 749, 'PI', 894, None, 42, 44, None, -3, -1, 'JAX', 'CHS', 193, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1121, 1125, 1204, 1200, 'PI', 894, None, 43, 35, None, 4, -4, 'SYR', 'BUF', 134, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1624, 1625, 1711, 1710, 'PI', 895, None, 47, 45, None, 1, -1, 'JFK', 'BDL', 106, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 2200, 2147, 2230, 2227, 'PI', 896, None, 30, 40, None, 3, 13, 'BWI', 'CHO', 120, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1825, 1820, 1929, 1930, 'PI', 896, None, 64, 70, None, -1, 5, 'LGA', 'ROC', 254, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1954, 2000, 2057, 2104, 'PI', 896, None, 63, 64, None, -7, -6, 'ROC', 'BW