In [0]:
%sh
/databricks/python3/bin/pip install --upgrade pip;
/databricks/python3/bin/pip install tableauhyperapi;
/databricks/python3/bin/pip install tableauserverclient;

In [0]:
import sys
sys.path.insert(0, '/Workspace/Repos/dustin.vannoy@databricks.com/hyperleaup/src')

In [0]:
from hyperleaup import HyperFile

In [0]:
eye_colors = [
  (1, 'BLK', 'Black'),
  (2, 'BLU', 'Blue'),
  (3, 'BRO', 'Brown'),
  (4, 'GRN', 'Green'),
  (5, 'GRY', 'Gray'),
  (6, 'HAZ', 'Hazel'),
  (7, 'BLK', 'Black'),
  (8, 'XXX', 'Unknown')
]
spark.createDataFrame(eye_colors, ['id', 'eye_color_code', 'eye_color_desc']).createOrReplaceTempView('eye_colors')

In [0]:
query = "SELECT * FROM eye_colors"

In [0]:
hf = HyperFile(name="eye_colors", sql=query, is_dbfs_enabled=True)

In [0]:
df = spark.createDataFrame(eye_colors, ['id', 'eye_color_code', 'eye_color_desc'])
hf_2 = HyperFile(name="more_eye_colors", df=df, is_dbfs_enabled=True)

In [0]:
# Hyperleaup supports 4 creation modes:
#   'parquet'   - (default) save to single parquet file then copy
#   'copy'      - use CSV format then copy) 
#   'insert'    - more forgiving for null values)
#   'largefile' - save to multiple Parquet files then copy
hf_3 = HyperFile(name="even_more_eye_colors",
                 df=df,
                 is_dbfs_enabled=True,
                 creation_mode='insert')

In [0]:
# Hyperleaup supports options for how to handle data types and null values. They are set by passing in a HyperFileConfig object.

#   timestamp_with_timezone: bool, True to use timestamptz datatype with HyperFile, 
#     enable if using timestamp values with Parquet create mode (default=False)

#   allow_nulls: bool, True to skip default behavior of replacing null numeric and
#     strings with non-null values (default=False).

#   convert_decimal_precision: bool, True to automatically convert decimals with 
#     precision over 18 down to 18. This has risk of data truncation so manual 
#     testing of your decimals is suggested before use. (default=False)

from hyperleaup import HyperFileConfig

hf_config = HyperFileConfig(timestamp_with_timezone=True, allow_nulls=True,
                            convert_decimal_precision=True)

hf_4 = HyperFile(name="plus_even_more_eye_colors",
                 df=df,
                 is_dbfs_enabled=True,
                 config=hf_config)

In [0]:
hf.print_table_def()

In [0]:
print(hf.name)

In [0]:
print(hf.sql)

In [0]:
hf.print_rows()

In [0]:
display(hf.df)

In [0]:
print(hf.path)

In [0]:
# Add your Tableau Server details here
# Note: you must have a site and project created before publishing
username = ''
password = ''
tableau_server = ''
site_id = ''
project_name = ''
datasource_name = ''

# Publish the Hyper File!
luid = hf.publish(tableau_server_url=tableau_server,
                  username=username,
                  password=password,
                  site_id=site_id,
                  project_name=project_name,
                  datasource_name=datasource_name)
print(f'Published Hyper File as new datasource luid: {luid}')

In [0]:
hf.save('/tmp/demo/')

In [0]:
%fs ls /tmp/demo/

In [0]:
hf = HyperFile.load(path='/tmp/demo/eye_colors.hyper', is_dbfs_enabled=True)

In [0]:
print(hf.path)

In [0]:
# Create new data
eye_colors = [
  (9, 'PNK', 'Pink'),
  (10, 'PUR', 'Purple'),
  (11, 'YEL', 'Yellow')
]
df = spark.createDataFrame(eye_colors, ['id', 'eye_color_code', 'eye_color_desc'])

In [0]:
# append to an existing Hyper File
hf.append(df=df)

In [0]:
hf.print_rows()