# Integrating Vertica with Dask
## Imports and basic setup

In [None]:
import numpy as np
import pandas as pd
import dask
import dask.dataframe as dd
from distributed.client import Client

import vertica_python
from verticapy.vdataframe import vDataFrame
from dask_vertica import (
    to_vertica,
    read_vertica,
)


schema = "coiled_schema"

vdb = {
    'host': 'xx.xx.xx.xx',
    'port': 5433,
    'user': '<vertica_db_username>',
    'password': '<vertica_db_password>',
    'database': '<vertica_db>',
    'connection_load_balance': True,
    'session_label': 'py',
    'unicode_error': 'strict'
}
client = Client(n_workers=4)
client

## Create a large(-ish) time series dataframe 
(1 year of 30-sec data)

In [None]:
# larger DF with more partitions
demo_ts = dd.demo.make_timeseries(
    start="2000-01-01",
    end="2000-12-31",
    freq="30s",
    partition_freq="1W"
)

## Upload the dataframe to a Vertica database
(this will overwrite any existing table of the same name)

In [None]:
to_vertica(demo_ts, vdb, "demo_ts", schema=schema, if_exists="overwrite")

## Try to upload load again, this time with `if_exists = "error"` to prevent overwriting

In [None]:
try:
    to_vertica(demo_ts, vdb, "demo_ts", schema=schema, if_exists="error")
except RuntimeError:
    print("Sucessfully avoided disaster")

## Now fetch the table we uploaded as a dataframe and do some calculations

In [None]:
demo_means = read_vertica(vdb, "demo_ts", 50, schema=schema).groupby("name")["y"].mean()
demo_means  # dask series, uncomputed

### Now do the computation

In [None]:
demo_means.compute()