In [None]:
# support async in notebooks, read more about why we need this here: https://pypi.org/project/nest-asyncio/
import nest_asyncio

nest_asyncio.apply()

### Connection

The ingestion library requires a connection object from the Firebolt Python SDK. The following cells demonstrate how to create one from the SDK's ResourceManager. Please refer to the SDK documentaiton for additional ways to generate a connection object. If you already have a connection object, you can skip this section!

create a .env file in the same directory as this notebook with the following contents (fill in values):

```text
FIREBOLT_USER=''
FIREBOLT_PASSWORD=''
FIREBOLT_SERVER=''
FIREBOLT_ENGINE=''
FIREBOLT_DATABASE=''
FIREBOLT_ACCOUNT=''
```


In [None]:
%load_ext dotenv
%dotenv

import os

e = os.getenv("FIREBOLT_DATABASE")
print(e)

### Firebolt 1.0 

In [None]:
from firebolt.service.manager import ResourceManager
from firebolt.client.auth import UsernamePassword

auth = UsernamePassword(os.getenv("FIREBOLT_USER"), os.getenv("FIREBOLT_PASSWORD"))

rm = ResourceManager(
    auth=auth,
    account_name=os.getenv("FIREBOLT_ACCOUNT"),
    api_endpoint=os.getenv("FIREBOLT_SERVER"),
)

If FIREBOLT_ENGINE engine and FIREBOLT_DATABASE exist:

In [None]:
engine = rm.engines.get_by_name(name=os.environ["FIREBOLT_ENGINE"])
engine.start()

If FIREBOLT_ENGINE engine and FIREBOLT_DATABASE don't exist:

In [None]:
database = rm.databases.create(name=os.getenv("FIREBOLT_DATABASE"), region="us-east-1")
engine = rm.engines.create(name=os.environ["FIREBOLT_ENGINE"])
engine.attach_to_database(
    database=rm.databases.get_by_name(
        name=os.getenv("FIREBOLT_DATABASE"),
    )
)
engine = engine.start()

Get a connection to FIREBOLT_DATABASE and FIREBOLT_ENGINE

In [None]:
from firebolt.client.auth import UsernamePassword
from firebolt.db.connection import connect
import os

auth = UsernamePassword(os.getenv("FIREBOLT_USER"), os.getenv("FIREBOLT_PASSWORD"))

connection = connect(
    account_name=os.getenv("FIREBOLT_ACCOUNT"),
    engine_name=os.getenv("FIREBOLT_ENGINE"),
    database=os.getenv("FIREBOLT_DATABASE"),
    auth=auth,
)

### Table configuration - Python


Define a table using native Python.

In [None]:
from firebolt_ingest.table_model import Table, Column
import time

default_name = f"temp_{int(time.time())}"
table_name = "" or default_name

table = Table(
    table_name=table_name,
    columns=[Column(name="col_1", type="STRING"), Column(name="col_2", type="INT")],
    primary_index=["col_1"],
    file_type="PARQUET",
    object_pattern="*.parquet",
)

### Table configuration - YAML

Define a table in YAML. 

Yaml config format:
```yaml
table_name: <Table Namee>
primary_index:
  - <Primary Index Column>
columns:
  - name: <Primary Index Column>
    type: <Firebolt Column Type>
    nullable: false
  - name: <Column Name>
    type(Optional): <Firebolt Column Type> # default: TEXT
    nullable: true/false
    extract_partition(Optional): <Column Partition Expression>
    alias(Optional): <Alias Column Name>
  ...
s3_url(Optional): s3://<bucket>/<key>
object_pattern: <Object Pattern>
file_type: <Supported Firebolt File Type>
sync_mode(Optional): overwrite/append
partitions(Optional):
  - <Partition Expression>
```

- Firebolt [types](https://docs.firebolt.io/general-reference/data-types.htm)
- Supported file [types](https://docs.firebolt.io/sql-reference/commands/create-external-table.html#type)
- Url and object (pattern)[https://docs.firebolt.io/sql-reference/commands/create-external-table.html#url-and-object_pattern]
- [Partitions](https://docs.firebolt.io/sql-reference/commands/create-external-table.html#partition) in Firebolt
- Working with (partitions)[https://docs.firebolt.io/working-with-partitions.html]

In [None]:
from firebolt_ingest.table_model import Table

# change example_parquet.yaml as you need
with open("example_parquet.yaml", "r") as stream:
    data_loaded = stream.read()

table = Table.parse_yaml(data_loaded)

### Create external table

To create an external table you should provide s3_url path to your data in AWS S3.
1. You can define s3_url in your YANL config
2. You can provide s3_url in AWSSettings object.

If you have s3_url in your YANL config and in AWSSettings object, s3_url from YAML file would be taken as a value.

In [None]:
%pip install firebolt_cli

In [None]:
from firebolt_cli.utils import (
    create_aws_creds_from_environ,
)
from firebolt_ingest.aws_settings import AWSSettings

AWSSettings without s3_url:

In [None]:
aws_settings = AWSSettings(aws_credentials=create_aws_creds_from_environ())

AWSSettings with s3_url:

In [None]:
aws_settings = AWSSettings(
    aws_credentials=create_aws_creds_from_environ(), s3_url="<BUCKET>/<KEY>"
)

In [None]:
from firebolt_ingest.table_service import TableService

ts = TableService(table, connection)

### Create external table

In [None]:
ts.create_external_table(aws_settings)

### Create internal table

In [None]:
ts.create_internal_table()

### Check if the tables exist

In [None]:
ts.does_external_table_exist()

In [None]:
ts.does_internal_table_exist()

### Insert data from external table to internal table

In [None]:
ts.insert()

### Verify ingestion

In [None]:
ts.verify_ingestion()

### Drop external and internal table

In [None]:
ts.drop_tables()