# Example Dataframe to Vertica

In [None]:
import pyarrow as pa
import sqlalchemy as sa
from pandas import DataFrame
from sqlalchemy.engine import create_engine

from dbhelper.util import DBUtil, VerticaCopy

## Function Convert Type And Create DDL SQL

In [None]:
def get_ddl(df: DataFrame, schema: str, table: str):
    colstrs: list[str] = type_pyarrow_to_vertica(df)
    sql: str = f"CREATE TABLE IF NOT EXISTS {schema}.{table} (\n"
    sql += ",\n".join(colstrs)
    sql += "\n);"
    return sql


def type_pyarrow_to_vertica(df: DataFrame) -> list[str]:
    # https://numpy.org/doc/stable/reference/generated/numpy.dtype.kind.html
    sql_cols = []
    for col, d in df.dtypes.items():
        if pa.types.is_string(d.pyarrow_dtype):
            sr = df[col]
            sr = sr.str.replace("\x00", "", regex=True)
            s = sr.apply(lambda x: len(x.encode("utf-8") if hasattr(x, "encode") else ""))
            max_octxt = max(s)
            if max_octxt <= 10:
                if max_octxt == 0:
                    max_octxt = 1
                sql_cols.append(f'"{col}" CHAR({max_octxt})')
            else:
                sql_cols.append(f'"{col}" VARCHAR({max_octxt + 1})')
        elif pa.types.is_null(d.pyarrow_dtype):
            sql_cols.append(f'"{col}" VARCHAR(10)')
        elif pa.types.is_boolean(d.pyarrow_dtype):
            sql_cols.append(f'"{col}" BOOLEAN')
        elif pa.types.is_integer(d.pyarrow_dtype):
            sql_cols.append(f'"{col}" INT')
        elif pa.types.is_floating(d.pyarrow_dtype):
            sql_cols.append(f'"{col}" FLOAT')
        elif pa.types.is_date(d.pyarrow_dtype):
            sql_cols.append(f'"{col}" DATE')
        elif pa.types.is_timestamp(d.pyarrow_dtype):
            sql_cols.append(f'"{col}" TIMESTAMP')
        else:
            sr = df[col]
            max_octxt = max(sr.apply(lambda x: len(str(x).encode())))
            sql_cols.append(f'"{col}" VARCHAR({max_octxt + 10})')
    return sql_cols

## How to Use

In [None]:
# connection string for connect vertica
vertica_dsn: str = "vertica_python+vertica://username:password@localhost:5432/VMart"

# create sqlalchemy.engine with `vertica_dsn` for connect vertica
vertica_engine: sa.Engine = create_engine(vertica_dsn)

# name of schema and table
table: str = "my_table"
schema: str = "my_schema"


# code get pandas dataframe
# example
df: DataFrame = DataFrame()  # change code

# convert dtype to pyarrow type
df: DataFrame = df.convert_dtypes(dtype_backend="pyarrow")


# create Class copy datafreme into vertica
# `lib: dbhepler`
# how to install `pip install git+https://github.com/attapon-th/dbhelper@latest`
v = VerticaCopy(vertica_engine, schema, table)

# get sql create table for dataframe
sql_ddl: str = get_ddl(df, schema, table)

# Execute Vertica SQL
# drop table if exists
v.execute(f"DROP TABLE IF EXISTS {v.schema}.{table}")
# create table if not exists
v.execute(sql_ddl)

# Copy Dataframe into Vertica
v.copy(df.to_csv(index=False))

# drop reject table
v.execute(f"DROP TABLE IF EXISTS {v.schema}.__REJECT_{table}")

#  close connection with vertica
v.close()