In [1]:
# support async in notebooks, read more about why we need this here: https://pypi.org/project/nest-asyncio/
import nest_asyncio

nest_asyncio.apply()

### Logging

In [2]:
# configure logging
import logging

logging.basicConfig(
    format="{asctime} - {name} - {levelname} - {message}", style="{", level="INFO"
)

### Connection

The ingestion library requires a connection object from the Firebolt Python SDK. The following cells demonstrate how to create one from the SDK's ResourceManager. Please refer to the SDK documentaiton for additional ways to generate a connection object. If you already have a connection object, you can skip this section!

create a .env file in the same directory as this notebook with the following contents (fill in values):
```
FIREBOLT_USER=''
FIREBOLT_PASSWORD=''
FIREBOLT_SERVER=''
FIREBOLT_DEFAULT_REGION=''
```


In [3]:
from firebolt.service.manager import ResourceManager
from firebolt.common import Settings

rm = ResourceManager()

 Please consider passing Auth object instead.
 Examples:
  >>> from firebolt.client.auth import UsernamePassword
  >>> ...
  >>> settings = Settings(auth=UsernamePassword(username, password), ...)
 or
  >>> from firebolt.client.auth import Token
  >>> ...
  >>> settings = Settings(auth=Token(access_token), ...)


In [4]:
rm

<firebolt.service.manager.ResourceManager at 0x7fd019dfaa30>

In [5]:
import time

# optionally replace "" with the database and engine to which you want to connect.
# otherwise, run this as is to create a temp database and engine for demo purposes.
default_name = f"temp_{int(time.time())}"
database_name = "" or default_name
engine_name = "" or default_name
table_name = "" or default_name

database = rm.databases.create(name=database_name, region="us-east-1")
engine = rm.engines.create(name=engine_name)
engine.attach_to_database(database=rm.databases.get_by_name(name=database_name))
engine = engine.start()
connection = engine.get_connection()

2022-07-01 12:32:04,112 - firebolt.service.database - INFO - Creating Database (name=temp_1656671523)
2022-07-01 12:32:05,548 - firebolt.service.engine - INFO - Creating Engine (name=temp_1656671523)
2022-07-01 12:32:09,261 - firebolt.service.binding - INFO - Attaching Engine (engine_id=4877cbd3-5243-4847-96ed-1123999d9354, name=temp_1656671523) to Database (database_id=79bab674-096d-4ddd-ba8c-53ebc33f7372, name=temp_1656671523)
2022-07-01 12:32:13,838 - firebolt.model.engine - INFO - Starting Engine (engine_id=4877cbd3-5243-4847-96ed-1123999d9354, name=temp_1656671523)
2022-07-01 12:34:25,745 - firebolt.model.engine - INFO - Engine status_summary=ENGINE_STATUS_SUMMARY_STARTING_INITIALIZING
2022-07-01 12:37:07,127 - firebolt.model.engine - INFO - Engine status_summary=ENGINE_STATUS_SUMMARY_RUNNING


### Table configuration - Python


Define a table using native Python.

In [6]:
from firebolt_ingest.table_model import Table, Column, Partition

table = Table(
    table_name=table_name,
    columns=[Column(name="col_1", type="STRING"), Column(name="col_2", type="INT")],
    primary_index=["col_1"],
    file_type="PARQUET",
    object_pattern=["*.parquet"],
)

### Table configuration - YAML

Define a table in YAML. The first cell uses the previous table object and converts it into YAML. The second cell shows how to take a YAML file and parse it into a `Table` object.

In [7]:
import yaml

table_yaml_config = yaml.dump(table.dict())

print(table_yaml_config)

columns:
- alias: null
  extract_partition: null
  name: col_1
  nullable: null
  type: STRING
  unique: null
- alias: null
  extract_partition: null
  name: col_2
  nullable: null
  type: INT
  unique: null
compression: null
csv_skip_header_row: null
file_type: PARQUET
json_parse_as_text: null
object_pattern:
- '*.parquet'
partitions: []
primary_index:
- col_1
table_name: temp_1656671523



In [8]:
table = Table.parse_yaml(table_yaml_config)
table

Table(table_name='temp_1656671523', columns=[Column(name='col_1', alias=None, type='STRING', extract_partition=None, nullable=None, unique=None), Column(name='col_2', alias=None, type='INT', extract_partition=None, nullable=None, unique=None)], primary_index=['col_1'], partitions=[], file_type='PARQUET', object_pattern=['*.parquet'], compression=None, csv_skip_header_row=None, json_parse_as_text=None)

### Create internal table

In [9]:
from firebolt_ingest.table_service import TableService

ts = TableService(table, connection)
ts.create_internal_table()

2022-07-01 12:37:09,880 - firebolt.async_db.cursor - INFO - Query fetched 2 rows in 1.1922521591186523 seconds
