In [None]:
!python -m pip install semantic-kernel==0.9.4b1

In [2]:
import semantic_kernel as sk

kernel = sk.Kernel()

In [3]:
from services import Service

# Select a service to use for this notebook (available services: OpenAI, AzureOpenAI, HuggingFace)
selectedService = Service.AzureOpenAI

In [4]:
kernel = sk.Kernel()

service_id = None
if selectedService == Service.OpenAI:
    from semantic_kernel.connectors.ai.open_ai import OpenAIChatCompletion

    api_key, org_id = sk.openai_settings_from_dot_env()
    service_id = "gpt4-32k"
    kernel.add_service(
        OpenAIChatCompletion(service_id=service_id, ai_model_id="gpt4-32k", api_key=api_key, org_id=org_id),
    )
elif selectedService == Service.AzureOpenAI:
    from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion

    deployment, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
    service_id = "aoai_chat_completion"
    kernel.add_service(
        AzureChatCompletion(service_id=service_id, deployment_name=deployment, endpoint=endpoint, api_key=api_key),
    )

In [5]:
# note: using plugins from the samples folder
plugins_directory = "plugins"

dataFunctions = kernel.import_plugin_from_prompt_directory(plugins_directory, "DataPlugin")

descriptorFunction = dataFunctions["DatabaseDescriptor"]

In [6]:
def read_data_schema_from_file(file_path):
    with open(file_path, 'r') as file:
        data_schema = file.read()
    return data_schema

file_path = "data_schema.txt"
data_schema = read_data_schema_from_file(file_path)


In [7]:
query = "what are the best performance products in 2024?"

result = await kernel.invoke(descriptorFunction, sk.KernelArguments(data_schema=data_schema, query= query))
print(result)

Error parsing XML of prompt: not well-formed (invalid token): line 339, column 98


<sql>
SELECT p.product_id, p.product_name, SUM(oi.quantity) as total_quantity
FROM products p
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
WHERE o.order_date BETWEEN '2024-01-01' AND '2024-12-31'
GROUP BY p.product_id, p.product_name
ORDER BY total_quantity DESC
LIMIT 10;
</sql>


In [8]:
import re

def parse_text_between_tags(text, start_tag, end_tag):
    pattern = rf"{re.escape(start_tag)}(.*?){re.escape(end_tag)}"
    matches = re.findall(pattern, text, re.DOTALL)
    return matches


In [9]:
# Extract the string data from the result object
result_string = result.data if hasattr(result, 'data') else str(result)

matches = parse_text_between_tags(result_string,"<sql>", "</sql>")
print(matches[0])


SELECT p.product_id, p.product_name, SUM(oi.quantity) as total_quantity
FROM products p
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
WHERE o.order_date BETWEEN '2024-01-01' AND '2024-12-31'
GROUP BY p.product_id, p.product_name
ORDER BY total_quantity DESC
LIMIT 10;



In [10]:
import sqlite3
import pandas as pd
import os

def run_sql_query( query):
    db_conn = os.getenv("DB_CONNECTION_STRING")
    print(db_conn)
    conn = sqlite3.connect("BikeStore.db")
    df = pd.read_sql_query(query, conn)
    conn.close()
    return df


In [11]:
df = run_sql_query(matches[0])
df.head()

BikeStore.db


Unnamed: 0,product_id,product_name,total_quantity


In [12]:
query ="display graph of product with name 'Surly Straggler' sales graph in years"

result = await kernel.invoke(descriptorFunction, sk.KernelArguments(data_schema=data_schema, query= query))
print(result)

Error parsing XML of prompt: not well-formed (invalid token): line 339, column 98


<sql>
SELECT p.product_name, o.order_date, oi.quantity
FROM products p
JOIN order_items oi ON p.product_id = oi.product_id
JOIN orders o ON oi.order_id = o.order_id
WHERE p.product_name = 'Surly Straggler'
</sql>

<python>
import matplotlib.pyplot as plt
import pandas as pd

# Assuming df is the result of the above SQL query
df['order_date'] = pd.to_datetime(df['order_date'])
df.set_index('order_date', inplace=True)
df.resample('Y').sum()['quantity'].plot(kind='line')

plt.title('Sales of Surly Straggler Over Years')
plt.xlabel('Year')
plt.ylabel('Quantity Sold')
plt.show()
</python>
