In [1]:
import backoff
from typing import Any

BACKOFF_CONFIG: dict[str, Any] = {'wait_gen': backoff.expo, 'exception': Exception, 'max_value': 8}

class ClickhouseSettings:
    NODES: str = 'clickhouse-node01:9000,clickhouse-node02:9000,clickhouse-node03:9000,clickhouse-node04:9000'
    USER: str = 'user'
    PASSWORD: str = 'password'

class ClickhouseNode1:
    HOST: str = 'clickhouse-node01'
    PORT: int = 9000
    USER: str = 'user'
    PASSWORD: str = 'password'

class ClickhouseNode2:
    HOST: str = 'clickhouse-node02'
    PORT: int = 9000
    USER: str = 'user'
    PASSWORD: str = 'password'

class ClickhouseNode3:
    HOST: str = 'clickhouse-node03'
    PORT: int = 9000
    USER: str = 'user'
    PASSWORD: str = 'password'

class ClickhouseNode4:
    HOST: str = 'clickhouse-node04'
    PORT: int = 9000
    USER: str = 'user'
    PASSWORD: str = 'password'

CLICKHOUSE_CONFIG = ClickhouseSettings()
NODES = [ClickhouseNode1(), ClickhouseNode2(), ClickhouseNode3(), ClickhouseNode4()]


In [2]:
clickhouse_url = f'jdbc:clickhouse://{NODES[0].HOST}:{NODES[0].PORT}'
properties = {
    'user': NODES[0].USER,
    'password': NODES[0].PASSWORD,
    'driver': 'com.github.housepower.jdbc.ClickHouseDriver'
}

In [None]:
from pyspark.sql import SparkSession

appName='clickhouse_connector'
spark = SparkSession \
    .builder \
    .appName(appName) \
    .master('spark://spark-master:7077') \
    .config('spark.jars', '/opt/clickhouse-native-jdbc-shaded-2.7.1.jar') \
    .getOrCreate()

In [6]:
query = 'SELECT * from default.candles ORDER BY begin DESC'
d = spark.read.format("jdbc") \
    .option('driver', properties.get('driver')) \
    .option('url', clickhouse_url) \
    .option('user', properties.get('user')) \
    .option('password',  properties.get('password')) \
    .option('query', query) \
    .load()

In [7]:
d.take(15)

                                                                                

[Row(id='0c0f3d29-16c0-4131-bef6-cfaf2045bf4a', secid='MTSS', open=251.65, close=251.65, high=251.65, low=251.65, value=27681.5, volume=110.0, begin=datetime.datetime(2023, 12, 7, 20, 49), end=datetime.datetime(2023, 12, 7, 20, 49, 59), created_at=datetime.datetime(2023, 12, 8, 1, 49, 54)),
 Row(id='7ac7522a-e495-41be-bd64-1c743168f920', secid='FLOT', open=121.88, close=121.88, high=121.88, low=121.76, value=135277.19999999995, volume=1110.0, begin=datetime.datetime(2023, 12, 7, 20, 49), end=datetime.datetime(2023, 12, 7, 20, 49, 59), created_at=datetime.datetime(2023, 12, 8, 1, 49, 54)),
 Row(id='8b3ecc1a-48a9-4d0b-b1a7-1c4e83095440', secid='PLZL', open=10593.5, close=10602.0, high=10604.0, low=10593.5, value=5841553.5, volume=551.0, begin=datetime.datetime(2023, 12, 7, 20, 49), end=datetime.datetime(2023, 12, 7, 20, 49, 59), created_at=datetime.datetime(2023, 12, 8, 1, 49, 54)),
 Row(id='d7fb26bc-c696-4a88-b0a8-a4eaa6da6557', secid='RUAL', open=35.98, close=36.0, high=36.0, low=35.97