# Clickhouse utilities

Connect to clickhouse database

In [2]:
import numpy as np
import numpy
import inspect
import pandas as pd
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from sqlalchemy import func

import clickhouse_util as ch_util
from clickhouse_util import get_clickhouse_create_sql

In [3]:
%load_ext autoreload
%autoreload 2

Create SqlAlchemy engine

In [4]:
ch_url = 'clickhouse+native://default:@localhost/default'
engine = sa.create_engine(ch_url)

Get tables

In [8]:
sql = "show tables from default"
pd.read_sql(sql, engine)

Unnamed: 0,name
0,.inner_id.3489d528-1eb4-44a7-b489-d5281eb444a7
1,flight
2,flight_view
3,visits


In [12]:
sql = "select database, type, name, type, position from system.columns where database = 'default' and table = 'flight'"
pd.read_sql(sql, engine)

Unnamed: 0,database,type,name,type.1,position
0,default,Int16,Year,Int16,1
1,default,Int8,Month,Int8,2
2,default,Int16,DayofMonth,Int16,3
3,default,Int8,DayOfWeek,Int8,4
4,default,Nullable(Int16),DepTime,Nullable(Int16),5
5,default,Int16,CRSDepTime,Int16,6
6,default,Nullable(Int16),ArrTime,Nullable(Int16),7
7,default,Int16,CRSArrTime,Int16,8
8,default,String,UniqueCarrier,String,9
9,default,Int32,FlightNum,Int32,10


Use pandas to read from SqlAlchemy engine

In [14]:
sql = 'select * from flight limit 100000'
df = pd.read_sql(sql, engine)

Get create sql

In [15]:
print(get_clickhouse_create_sql(df, 'flight', ['Year']))

create table flight (
	Year Int64,
	Month Nullable(Int64),
	DayofMonth Nullable(Int64),
	DayOfWeek Nullable(Int64),
	DepTime Nullable(Float64),
	CRSDepTime Nullable(Int64),
	ArrTime Nullable(Float64),
	CRSArrTime Nullable(Int64),
	UniqueCarrier Nullable(String),
	FlightNum Nullable(Int64),
	TailNum Nullable(String),
	ActualElapsedTime Nullable(Float64),
	CRSElapsedTime Nullable(Float64),
	AirTime Nullable(Float64),
	ArrDelay Nullable(Float64),
	DepDelay Nullable(Float64),
	Origin Nullable(String),
	Dest Nullable(String),
	Distance Nullable(Int64),
	TaxiIn Nullable(Float64),
	TaxiOut Nullable(Float64),
	Cancelled Nullable(Int64),
	CancellationCode Nullable(String),
	Diverted Nullable(Int64),
	CarrierDelay Nullable(String),
	WeatherDelay Nullable(String),
	NASDelay Nullable(String),
	SecurityDelay Nullable(String),
	LateAircraftDelay Nullable(String)
)
Engine = MergeTree
Order by Year


In [18]:
metadata = sa.MetaData(bind=engine)
metadata.reflect(only=['flight'])
flight_tbl = metadata.tables['flight']

for column in flight_tbl.columns:
    print(column.name)

Year
Month
DayofMonth
DayOfWeek
DepTime
CRSDepTime
ArrTime
CRSArrTime
UniqueCarrier
FlightNum
TailNum
ActualElapsedTime
CRSElapsedTime
AirTime
ArrDelay
DepDelay
Origin
Dest
Distance
TaxiIn
TaxiOut
Cancelled
CancellationCode
Diverted
CarrierDelay
WeatherDelay
NASDelay
SecurityDelay
LateAircraftDelay


Use SqlAlchemy ORM to query the database

In [20]:
Session = sessionmaker(bind=engine)
session = Session()

qry = session.query(flight_tbl)

for row in session.query(flight_tbl).filter(
        flight_tbl.c.Month == 2, flight_tbl.c.DayofMonth == 29).limit(10):
    print(row)

(1988, 2, 29, 1, 957, 1000, 1054, 1104, 'PI', 894, None, 57, 64, None, -10, -3, 'DCA', 'SYR', 298, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 704, 705, 746, 749, 'PI', 894, None, 42, 44, None, -3, -1, 'JAX', 'CHS', 193, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1121, 1125, 1204, 1200, 'PI', 894, None, 43, 35, None, 4, -4, 'SYR', 'BUF', 134, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1624, 1625, 1711, 1710, 'PI', 895, None, 47, 45, None, 1, -1, 'JFK', 'BDL', 106, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 2200, 2147, 2230, 2227, 'PI', 896, None, 30, 40, None, 3, 13, 'BWI', 'CHO', 120, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1825, 1820, 1929, 1930, 'PI', 896, None, 64, 70, None, -1, 5, 'LGA', 'ROC', 254, None, None, 0, None, 0, None, None, None, None, None)
(1988, 2, 29, 1, 1954, 2000, 2057, 2104, 'PI', 896, None, 63, 64, None, -7, -6, 'ROC', 'BW