In [134]:
from __future__ import annotations

import dataclasses
import random
import re
import string
import subprocess
import time
from io import StringIO

import hypothesis.strategies as st
import pandas as pd
import pytest
from hypothesis import settings
from hypothesis.stateful import (
    RuleBasedStateMachine,
    rule,
    Bundle,
    initialize,
    run_state_machine_as_test,
    consumes,
)

types = [
    "Int8",
    "Int16",
    "Int32",
    "Int64",
    "Int128",
    "Int256",
    "UInt8",
    "UInt16",
    "UInt32",
    "UInt64",
    "UInt128",
    "UInt256",
    "Float32",
    "Float64"
    # 'Decimal(P, S)',
    # 'Decimal32(S)', 'Decimal64(S)', 'Decimal128(S)', 'Decimal256(S)',
    "Bool",
    "String",
    # 'FixedString(N),
    "UUID",
    "Date",
    "Date32",
    "DateTime",
    # 'DateTime64(precision)'
    "Enum8",
    "Enum16",
    # Array(T)
    # Tuple(T1, T2, ...)
    # Nullable(T)
    "IPv4",
    "IPv6",
    # LowCardinality(T)
    # 'Map(key, value)'
    # Nested(Name1 Type1, Name2 Type2, ...)
    # SimpleAggregateFunction(name, types_of_arguments...)
    # AggregateFunction(name, types_of_arguments...)
    # 'Point',
    # 'Ring',
    # 'Polygon'
    # 'MultiPolygon'
    # 'Interval',
    # 'Dynamic'
]
projections = []

columns = st.one_of()

table_name = st.text()
with_partition_by = random.random() < 0.6
with_sample_by = random.random() < 0.6
with_primary_key = random.random() < 0.6
order_by = st.one_of(st.just("tuple()"), st.just('1'))
# partition_by = st.one_of(columns)
# sample_by = st.one_of(columns)
primary_key = st.one_of(order_by)


# sample_by = st.one_of()


@st.composite
def st_column(draw):
    pass


@st.composite
def st_columns(draw):
    pass


# Define strategies for different ClickHouse types
int_types = st.sampled_from(
    ["Int8", "Int16", "Int32", "Int64", "UInt8", "UInt16", "UInt32", "UInt64"]
)
float_types = st.sampled_from(["Float32", "Float64"])
decimal_types = st.builds(
    lambda p, s: f"Decimal({p},{s})", st.integers(1, 76), st.integers(0, 76)
)
fixed_string_type = st.builds(
    lambda p: f"FixedString({p})", st.integers(1, 255)
)

date_types = st.sampled_from(["Date", "DateTime", "DateTime64(3)"])

# Composite strategy for column types
st_base_column_types = st.one_of(
    int_types,
    float_types,
    decimal_types,
    date_types,
    st.just("String"),
    fixed_string_type,
    st.just("UUID"),
    st.just("IPv4"),
    st.just("IPv6"),
)


array_types = st.recursive(
    st_base_column_types,
    lambda s: st.builds(lambda t: f"Array({t})", s),
    max_leaves=3
)


nullable = st.builds(
    lambda t: f"Nullable({t})",
    st_base_column_types,
)

# Strategy for column nameswhitespace = ' \t\n\r\v\f'
whitespace = ' \t\n\r\v\f'
ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
ascii_letters = ascii_lowercase + ascii_uppercase
digits = '0123456789'
# punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
# no backtick `
punctuation = r"""!"#$%&'()*+,-./:;<=>?@[\]^_{|}~"""
printable = digits + ascii_letters + punctuation + whitespace

column_name_strategy = st.text(
    alphabet=printable, min_size=1, max_size=1000
)

# Composite strategy for a single column
column_strategy = st.builds(
    lambda name, type: {"name": name, "type": type},
    column_name_strategy,
    st_base_column_types,
)

# Strategy for a list of unique columns
columns_strategy = st.lists(
    column_strategy, min_size=1, max_size=500, unique_by=lambda x: x["name"]
)

print(columns_strategy.example())


[{'name': 'JSE#f@3nuJEnd/{/eQ', 'type': 'Int64'}, {'name': '[\tyo(7/,<5RTRe\\j|', 'type': 'IPv6'}, {'name': '-=i6ks-iK', 'type': builds(lambda p: f"FixedString({p})", integers(min_value=1, max_value=255))}, {'name': 'd)=', 'type': 'Float64'}, {'name': 'C_c<S', 'type': 'Decimal(59,51)'}, {'name': 'gd,', 'type': 'IPv4'}, {'name': 'A\rR', 'type': 'Decimal(9,44)'}, {'name': 'wSo5>b?g 3', 'type': 'IPv6'}, {'name': 'p6A', 'type': 'UInt32'}, {'name': 'u\x0cSFo?mKL}', 'type': 'Float32'}, {'name': '&Z(G\rCo) ', 'type': 'IPv6'}, {'name': 'lO\x0c_{FGUp\r3JgZjU\n?nM', 'type': 'IPv6'}, {'name': "2U%}-0)MYM'O\t\n", 'type': 'String'}, {'name': 'j', 'type': 'IPv6'}, {'name': ".\x0b\\*Vd'-", 'type': 'Float32'}, {'name': '(~_=#<($MvDUzFVd*', 'type': builds(lambda p: f"FixedString({p})", integers(min_value=1, max_value=255))}, {'name': 'I=', 'type': 'IPv4'}, {'name': 'n', 'type': 'String'}, {'name': 'N.P', 'type': 'Float64'}, {'name': 'gX', 'type': 'Decimal(59,73)'}, {'name': '\x0ck3z8', 'type': 'Date'},