### **Load Environment variables from .env file**

In [9]:
from dotenv import load_dotenv
import os
from IPython.display import display, HTML, JSON, Markdown, Image
import pandas as pd
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
from azure.kusto.data.exceptions import KustoServiceError
from azure.kusto.data.helpers import dataframe_from_result_table

load_dotenv()
AAD_TENANT_ID = os.getenv("AAD_TENANT_ID")
KUSTO_CLUSTER = os.getenv("KUSTO_CLUSTER")
KUSTO_DATABASE = os.getenv("KUSTO_DATABASE")
KUSTO_TABLE = os.getenv("KUSTO_TABLE")
KUSTO_MANAGED_IDENTITY_APP_ID = os.getenv("KUSTO_MANAGED_IDENTITY_APP_ID")
KUSTO_MANAGED_IDENTITY_SECRET = os.getenv("KUSTO_MANAGED_IDENTITY_SECRET")


In [10]:
# read the entities csv into a pandas dataframe
import pandas as pd

entities_df = pd.read_csv("./data/processed/entities.csv")
embeddings_df = pd.read_csv("./data/processed/embeddings.csv")

In [16]:
# Connect to adx using AAD app registration
cluster = KUSTO_CLUSTER
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, KUSTO_MANAGED_IDENTITY_APP_ID, KUSTO_MANAGED_IDENTITY_SECRET,  AAD_TENANT_ID)
client = KustoClient(kcsb)
kusto_db = KUSTO_DATABASE
embeddings_table = "aviationIncidentsEmbeddings"
entities_table = "aviationIncidentsEntities"

In [12]:
# create tables in ADX
createTableCommand = f".create table {embeddings_table} (document_id:string, document_name:string, content:string, embedding:string)"
response = client.execute_mgmt(KUSTO_DATABASE, createTableCommand)
dataframe_from_result_table(response.primary_results[0])


Unnamed: 0,TableName,Schema,DatabaseName,Folder,DocString
0,avationIncidentsEmbeddings,"{""Name"":""avationIncidentsEmbeddings"",""OrderedC...",embeddings,,


In [13]:
# create tables in ADX
createTableCommand = f".create table {entities_table} (document_id:string, document_name:string, entities:dynamic)"
response = client.execute_mgmt(KUSTO_DATABASE, createTableCommand)
dataframe_from_result_table(response.primary_results[0])

Unnamed: 0,TableName,Schema,DatabaseName,Folder,DocString
0,avationIncidentsEntities,"{""Name"":""avationIncidentsEntities"",""OrderedCol...",embeddings,,


In [14]:
# ingest the dataframes into the table
ingestTableCommand = f".ingest inline into table {embeddings_table} with (ignoreFirstRecord=true) <| {embeddings_df.to_csv(index=False)} "
response = client.execute(KUSTO_DATABASE, ingestTableCommand)
dataframe_from_result_table(response.primary_results[0])

Unnamed: 0,ExtentId,ItemLoaded,Duration,HasErrors,OperationId
0,c09c17d4-a7e5-4389-a948-32f710a5eea8,inproc:7a856313-6ac3-46c3-841c-dc5971c33c90,0 days 00:00:00.453986,False,28a4390a-4b29-4ac2-b1c8-e0733cdec36b


In [15]:
# ingest the dataframes into the table
ingestTableCommand = f".ingest inline into table {entities_table} with (ignoreFirstRecord=true) <| {entities_df.to_csv(index=False)} "
response = client.execute(KUSTO_DATABASE, ingestTableCommand)
dataframe_from_result_table(response.primary_results[0])

Unnamed: 0,ExtentId,ItemLoaded,Duration,HasErrors,OperationId
0,f564cc69-ec31-4c65-b698-f1af6a59391a,inproc:9414c04d-9945-4bb3-b8da-2e99a51ea67c,0 days 00:00:00.140638500,False,51508124-8730-4a86-8520-0f5556b5cf88
