### **Load Environment variables from .env file**

In [1]:
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON, Markdown, Image
import pandas as pd
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
from azure.kusto.data.exceptions import KustoServiceError
from azure.kusto.data.helpers import dataframe_from_result_table

load_dotenv()
AAD_TENANT_ID = os.getenv("AAD_TENANT_ID")
KUSTO_CLUSTER = os.getenv("KUSTO_CLUSTER")
KUSTO_DATABASE = os.getenv("KUSTO_DATABASE")
KUSTO_TABLE = os.getenv("KUSTO_TABLE")
KUSTO_MANAGED_IDENTITY_APP_ID = os.getenv("KUSTO_MANAGED_IDENTITY_APP_ID")
KUSTO_MANAGED_IDENTITY_SECRET = os.getenv("KUSTO_MANAGED_IDENTITY_SECRET")


In [2]:
# read the entities csv into a pandas dataframe
import pandas as pd

entities_df = pd.read_csv("./data/processed/entities.csv")
embeddings_df = pd.read_csv("./data/processed/embeddings.csv")

In [3]:
# Connect to adx using AAD app registration
cluster = KUSTO_CLUSTER
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, KUSTO_MANAGED_IDENTITY_APP_ID, KUSTO_MANAGED_IDENTITY_SECRET,  AAD_TENANT_ID)
client = KustoClient(kcsb)
kusto_db = KUSTO_DATABASE
embeddings_table = "aviationIncidentsEmbeddings"
entities_table = "aviationIncidentsEntities"

In [4]:
# create tables in ADX
createTableCommand = f".create table {embeddings_table} (document_id:string, document_name:string, content:string, embedding:string)"
response = client.execute_mgmt(KUSTO_DATABASE, createTableCommand)
dataframe_from_result_table(response.primary_results[0])


Unnamed: 0,TableName,Schema,DatabaseName,Folder,DocString
0,aviationIncidentsEmbeddings,"{""Name"":""aviationIncidentsEmbeddings"",""Ordered...",embeddings,,


In [5]:
# create tables in ADX
createTableCommand = f".create table {entities_table} (document_id:string, document_name:string, entities:dynamic)"
response = client.execute_mgmt(KUSTO_DATABASE, createTableCommand)
dataframe_from_result_table(response.primary_results[0])

Unnamed: 0,TableName,Schema,DatabaseName,Folder,DocString
0,aviationIncidentsEntities,"{""Name"":""aviationIncidentsEntities"",""OrderedCo...",embeddings,,


In [6]:
# ingest the dataframes into the table
ingestTableCommand = f".ingest inline into table {embeddings_table} with (ignoreFirstRecord=true) <| {embeddings_df.to_csv(index=False)} "
response = client.execute(KUSTO_DATABASE, ingestTableCommand)
dataframe_from_result_table(response.primary_results[0])

Unnamed: 0,ExtentId,ItemLoaded,Duration,HasErrors,OperationId
0,13c77a54-653a-4f32-b0fe-c09cc5e6f017,inproc:20e78e8e-6981-494c-b234-5e77ad1168d8,0 days 00:00:00.407092400,False,8838491c-61fc-4077-88a4-b0ad8ba19340


In [7]:
# ingest the dataframes into the table
ingestTableCommand = f".ingest inline into table {entities_table} with (ignoreFirstRecord=true) <| {entities_df.to_csv(index=False)} "
response = client.execute(KUSTO_DATABASE, ingestTableCommand)
dataframe_from_result_table(response.primary_results[0])

Unnamed: 0,ExtentId,ItemLoaded,Duration,HasErrors,OperationId
0,162c7a64-5920-45f9-b572-ded396742586,inproc:a823b7c7-d884-40f2-8103-2983dad58f0b,0 days 00:00:00.106308,False,ed77a770-7132-4dd9-b2b4-85ee76a1b5b4
