### DESCRIPTION:
    This example shows how to generate KQL (Kusto Query language) from user input using OpenAI GPT3.5 completion model
    We will query Azure Data Explorer (Kusto) using user's input in plain English
    
### REQUIREMENTS:
    Create an .env file with your OpenAI API key and save it in the root directory of this project with the following
    OPENAI_DEPLOYMENT_ENDPOINT ="<your openai endpoint>" 
    OPENAI_API_KEY = "<your openai api key>"
    OPENAI_DEPLOYMENT_NAME = "<your gpt35 deployment name>"
    OPENAI_DEPLOYMENT_VERSION = "<gpt35 api version>"
    OPENAI_MODEL_NAME="<gpt35 model name>"


In [1]:
from azure.kusto.data import KustoClient, KustoConnectionStringBuilder
from azure.kusto.data.exceptions import KustoServiceError
from azure.kusto.data.helpers import dataframe_from_result_table
import utils

In [2]:
# Connect to adx using AAD app registration
cluster = utils.KUSTO_CLUSTER
kcsb = KustoConnectionStringBuilder.with_aad_application_key_authentication(cluster, utils.KUSTO_MANAGED_IDENTITY_APP_ID, utils.KUSTO_MANAGED_IDENTITY_SECRET,  utils.AAD_TENANT_ID)
client = KustoClient(kcsb)

In [3]:
#testing the connection to kusto works - sample query to get the top 10 results from wikipedia
query = "sales | take 10"

response = client.execute("retail", query)
for row in response.primary_results[0]:
    print("Item:{}".format(row["item_description"]))

Item:BOOTLEG RED - 750ML
Item:MOMENT DE PLAISIR - 750ML
Item:S SMITH ORGANIC PEAR CIDER - 18.7OZ
Item:SCHLINK HAUS KABINETT - 750ML
Item:SANTORINI GAVALA WHITE - 750ML
Item:CORTENOVA VENETO P/GRIG - 750ML
Item:SANTA MARGHERITA P/GRIG ALTO - 375ML
Item:JACK DANIELS COUNTRY COCKTAIL SOUTHERN PEACH - 10.OZ-NR
Item:KNOB CREEK BOURBON 9YR - 100P - 375ML
Item:KSARA CAB - 750ML


In [4]:
from dotenv import load_dotenv
import pandas as pd
import utils
import os
import openai
import json 

load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")
# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

In [5]:
def call_openai(template_prefix, text):
    prompt = template_prefix + text + template_sufix
    response = openai.Completion.create(
        engine=utils.OPENAI_DEPLOYMENT_NAME,
        prompt=prompt,
        temperature=0,
        max_tokens=4096,
        top_p=0.95,
        frequency_penalty=0,
        presence_penalty=0,
        stop=["<|im_end|>"])
    response = response['choices'][0]['text']
    response = utils.remove_chars("\n", response)
    response=utils.start_after_string("Answer:", response)
    response=utils.remove_tail_tags("<|im_end|>", response)
    return response

def call_openai_kql_response(template_prefix, text):
    response = call_openai(template_prefix, text)
    return response

In [6]:
kusto_query_template_prefix = """
<|im_start|>system
I have an Azure Data Explorer (Kusto) table containing the following columns: 
year, month, supplier, item_code, item_description,item_type,retail_sales,retail_transfers,warehouse_sales

Write an KQL query based on the user input below.
Answer in a concise KQL query format only with the KQL command, no extra text.

user input: 
"""
template_sufix = "<|im_end|>\n<|im_start|>assistant"

In [7]:
query = call_openai_kql_response(kusto_query_template_prefix, "I would like to get the total retail sales for 2020 by months")
print(query)

```    T    | where year == 2020    | summarize total_retail_sales = sum(retail_sales) by month```


In [23]:
query = query.replace("T", "sales")
query = query.replace("```", "")
response = client.execute("retail", query)
df = dataframe_from_result_table(response.primary_results[0])
print(df)

   month  total_retail_sales
0      1            74318.77
1      7            94538.96
2      3           109411.29
3      9            82108.68


In [24]:
import plotly.express as px

fig = px.bar(df, x = 'month', y = 'total_retail_sales')
fig.show()
