# LLama Index Implementation
### DB search using English (and ChatGPT)


In [205]:
from sqlalchemy import create_engine, text
import pandas as pd

In [227]:
engine = create_engine("postgresql+psycopg2:///?user=postgres&password=123@Team&host=127.0.0.1&port=5432&database=kmfl_llama")
# dbConnection = engine.connect()

with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM tbl_kmfl WHERE open_whole_day = true"))
    for row in result:
        print(row)

(3, 'Al Shafic Nursing Home Limited', 'Al Shafic Nursing Home Limited', 'Level 3', 'Nursing Homes', 'Wajir', 'Wajir East', 'Private Practice', 'Admitting  General Patients Only', 'Operational', True, True, True, True, "['Accident and Emergency casualty Services', 'Basic Occupational Therapy', 'Basic Services for Gender Based Violence Survivors', 'Basic- Perform basi ... (286 characters truncated) ... ections', 'Basic Emergency Preparedness', 'HIV Testing Services', 'Outpatient', 'Inpatient', 'Basic dental services', 'Basic Mental Health Services']", 40.04488825, 1.7440496, "['24fd65c9-56ea-497c-82ec-3fdf0ac9d006', '36d2f7a8-41b3-40f8-8943-e513fa28c8db', 'f312d0b2-5e41-4587-b54d-ff1d1bc128c6', '4a5acc1b-894f-4405-b80e-8e1 ... (382 characters truncated) ... 1695-486e-85bd-4df0c4571b01', '3a7cb460-2ecb-450b-b21a-4c67e90ba6d2', '6f8c6c75-135c-4911-848d-8d3467c00605', '3907645f-f8f9-44da-b66c-ebf73bedacac']", 'Private Practice - Clinical Officer')
(24, 'St.Nkiria Medical Centre Nanyuki', 'S

In [224]:
from llama_index import SQLDatabase

table_details={
    'tbl_kmfl': 'Kenya facility list - list of hospitals, dispensaries, clinics etc',
    'service_category': 'categories of services offered in facilities',
    'service': 'services offered in facilities',
    'pharmacies': 'pharmacies'
}

ze_model = 'gpt-3.5-turbo' #'gpt-3.5-turbo-instruct' # 

sql_db  = SQLDatabase(engine, sample_rows_in_table_info=2)

In [208]:
list(sql_db._all_tables)
# print(sql_db.table_info)

['pharmacies', 'service', 'tbl_kmfl', 'service_category']

In [209]:
import os
import openai
from dotenv import load_dotenv
load_dotenv()

openai.api_key = os.getenv("OPEN_AI_KEY")


In [210]:
import tiktoken
from llama_index.callbacks import CallbackManager, TokenCountingHandler

token_counter = TokenCountingHandler(
    tokenizer=tiktoken.encoding_for_model(ze_model).encode
)

callback_manager = CallbackManager([token_counter])

In [211]:
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI

llm = OpenAI(temperature=.1, model=ze_model)

service_ctx = ServiceContext.from_defaults(
    llm=llm, callback_manager=callback_manager
)

In [212]:
from llama_index.indices.struct_store.sql_query import NLSQLTableQueryEngine

query_engine = NLSQLTableQueryEngine(
    sql_database=sql_db,
    service_context=service_ctx
)


In [213]:
# query_str = "how many tables are we querying, what are the table names?"
# response  = query_engine.query(query_str)

In [214]:
# print(response)
# print(token_counter.total_llm_token_count)

#### Efficient querying?

In [215]:
from llama_index.objects import ObjectIndex, SQLTableNodeMapping, SQLTableSchema

tables = list(sql_db._all_tables)
table_node_mapping = SQLTableNodeMapping(sql_db)
table_schema_objs=[]

for table in tables:
    table_schema_objs.append(SQLTableSchema(table_name=table, context_str=table_details[table]))

In [216]:
table_schema_objs

[SQLTableSchema(table_name='pharmacies', context_str='pharmacies'),
 SQLTableSchema(table_name='service', context_str='services offered in facilities'),
 SQLTableSchema(table_name='tbl_kmfl', context_str='Kenya facility list - list of hospitals, dispensaries, clinics etc'),
 SQLTableSchema(table_name='service_category', context_str='categories of services offered in facilities')]

In [219]:
from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index import VectorStoreIndex

obj_index = ObjectIndex.from_objects(
    table_schema_objs, table_node_mapping, VectorStoreIndex, service_context = service_ctx
)

query_engine = SQLTableRetrieverQueryEngine(sql_db, obj_index.as_retriever(similarity_top_k=3), service_context=service_ctx)



In [223]:
resp = query_engine.query('how many facilities do we have that operate 24hrs')

print('response: ', resp)
print('metadata: ', resp.metadata['sql_query'])
print('metadata: ', resp.metadata['result'])
print('tokens: ', token_counter.total_llm_token_count)

response:  We have a total of 3020 facilities that operate 24 hours.
metadata:  SELECT COUNT(*) FROM tbl_kmfl WHERE open_whole_day = true
metadata:  [(3020,)]
tokens:  2558
