# LLama Index Implementation
### DB search using English (and ChatGPT)


In [1]:
from sqlalchemy import create_engine, text
import pandas as pd

In [6]:
engine = create_engine("postgresql+psycopg2:///?user=postgres&password=123@Team&host=127.0.0.1&port=5432&database=kmfl_llama")
# dbConnection = engine.connect()

with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM tbl_kmfl WHERE open_whole_day = true"))
    # for row in result:
    print(result.first())

(3, 'Al Shafic Nursing Home Limited', 'Al Shafic Nursing Home Limited', 'Level 3', 'Nursing Homes', 'Wajir', 'Wajir East', 'Private Practice', 'Admitting  General Patients Only', 'Operational', True, True, True, True, "['Accident and Emergency casualty Services', 'Basic Occupational Therapy', 'Basic Services for Gender Based Violence Survivors', 'Basic- Perform basi ... (286 characters truncated) ... ections', 'Basic Emergency Preparedness', 'HIV Testing Services', 'Outpatient', 'Inpatient', 'Basic dental services', 'Basic Mental Health Services']", 40.04488825, 1.7440496, "['24fd65c9-56ea-497c-82ec-3fdf0ac9d006', '36d2f7a8-41b3-40f8-8943-e513fa28c8db', 'f312d0b2-5e41-4587-b54d-ff1d1bc128c6', '4a5acc1b-894f-4405-b80e-8e1 ... (382 characters truncated) ... 1695-486e-85bd-4df0c4571b01', '3a7cb460-2ecb-450b-b21a-4c67e90ba6d2', '6f8c6c75-135c-4911-848d-8d3467c00605', '3907645f-f8f9-44da-b66c-ebf73bedacac']", 'Private Practice - Clinical Officer')


In [11]:
from llama_index import SQLDatabase

table_details={
    'tbl_kmfl': 'Kenya facility list - list of hospitals, dispensaries, clinics etc',
    'service_category': 'categories of services offered in facilities',
    'service': 'services offered in facilities',
    # 'serv_cat_fac_mapper': 'service-category to facility mapper',
    # 'serv_fac_mapper': 'service to facility mapper',
    # 'pharmacies': 'pharmacies'
}

ze_model = 'gpt-3.5-turbo' 

sql_db  = SQLDatabase(engine, include_tables=['tbl_kmfl'], sample_rows_in_table_info=5 )

In [12]:
list(sql_db._all_tables)
# print(sql_db.table_info)

['tbl_kmfl', 'service_category', 'service']

In [13]:
import os
import openai
from dotenv import load_dotenv
load_dotenv()

openai.api_key = os.getenv("OPEN_AI_KEY")


In [14]:
import tiktoken
from llama_index.callbacks import CallbackManager, TokenCountingHandler

token_counter = TokenCountingHandler(
    tokenizer= tiktoken.encoding_for_model(ze_model).encode
)

callback_manager = CallbackManager([token_counter])

In [15]:
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI

llm = OpenAI(temperature=0.1, model=ze_model)

service_ctx = ServiceContext.from_defaults(
    llm=llm, callback_manager=callback_manager
)

In [16]:
from llama_index.indices.struct_store.sql_query import NLSQLTableQueryEngine

query_engine = NLSQLTableQueryEngine(
    sql_database=sql_db,
    service_context=service_ctx
)


In [19]:
# query_str = "how many tables are we querying from public schema?"
# # response = query_engine.query(query_str)
# response  = query_engine.query('facilities that have child services; produce postgres sql using "like" statement where possible')

In [20]:
# print(response)
# print(response.metadata['sql_query'])
# print(token_counter.total_llm_token_count)

#### Efficient querying?

In [21]:
from llama_index.objects import ObjectIndex, SQLTableNodeMapping, SQLTableSchema

tables = list(sql_db._all_tables)
table_node_mapping = SQLTableNodeMapping(sql_db)
table_schema_objs=[]

tables

for table in tables:
    table_schema_objs.append(SQLTableSchema(table_name=table, context_str=table_details[table]))

In [22]:
table_schema_objs

[SQLTableSchema(table_name='tbl_kmfl', context_str='Kenya facility list - list of hospitals, dispensaries, clinics etc'),
 SQLTableSchema(table_name='service_category', context_str='categories of services offered in facilities'),
 SQLTableSchema(table_name='service', context_str='services offered in facilities')]

In [23]:
from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index import VectorStoreIndex, GPTVectorStoreIndex

obj_index = ObjectIndex.from_objects(
    table_schema_objs, table_node_mapping, VectorStoreIndex, service_context = service_ctx
)

query_engine = SQLTableRetrieverQueryEngine(sql_db, obj_index.as_retriever(similarity_top_k=3), service_context=service_ctx)

In [81]:
resp = query_engine.query('top 12 facilities which are above level 1 and private; produce psql using "ilike" statement where possible')
# ; produce psql using "like" statement where possible
# ; in the produced sql query replace "like" with "ilike" 

# facilities open weekend -> very slow... interesting -> add record limit for faster output
### top 10 facilities that open facilities open weekend in kisii
# which are the level 4 facilities? -> very slow

# Seems chatting features are optimized for summaries
# how many facilities are level 4; produce psql using "like" statement where possible -> fast
# how many facilities operate 24hrs -> fast
# how many facilities do we have that are open weekend -> quite fast, seems queries should be more verbose

print('response: ', resp)
print('metadata: ', resp.metadata['sql_query'])
# print('metadata: ', resp.metadata['result'])
print('tokens: ', token_counter.total_llm_token_count)

response:  Here are the top 12 private facilities that are above level 1:

1. Komarock Healthcare-Kathwana - Level 5
2. Jacaranda Maternity Hospital - Level 5
3. Lifecare Hospitals Meru - Level 5
4. AAR Hospital - Muthaiga - Level 5
5. Aga Khan Hospital - Level 5
6. Nairobi Hospital - Level 5
7. 3rd Park Hospital Limited - Level 5
8. St. Newreb Hospital - Level 5
9. Afya Bora Hospital Annex - Level 5
10. Citymed Cottage Hospital Nyandarua - Level 4
11. Lifecare Hospital, Eldoret - Level 4
12. Care Giver Community Hospital - Level 4
metadata:  SELECT id, name, keph_level_name, owner_type_name
FROM tbl_kmfl
WHERE keph_level_name ILIKE 'level%'
AND owner_type_name ILIKE 'private%'
ORDER BY keph_level_name DESC
LIMIT 12;
tokens:  134962
