# LLama Index Implementation
### DB search using English (and ChatGPT)


In [89]:
from sqlalchemy import create_engine, text
import pandas as pd

In [90]:
engine = create_engine("postgresql+psycopg2:///?user=postgres&password=123@Team&host=127.0.0.1&port=5432&database=kmfl_llama")
# dbConnection = engine.connect()

with engine.connect() as conn:
    result = conn.execute(text("SELECT * FROM tbl_kmfl WHERE open_whole_day = true"))
    # for row in result:
    print(result.first())

(8298, 'Jamia Med Clinic', 'Jamia Medical Clinic', 2, 'Medical Clinic', 'Uasin Gishu', 'Kapseret', 'Private Practice', '', 'Operational', True, False, False, False, '[]', None, None, None, 'Private Practice - Private Institution Academic', 6, 2, 'Kapseret', 'Kipkenyo')


In [91]:
from llama_index import SQLDatabase

table_details={
    'tbl_kmfl': 'Kenya facility list - list of hospitals, dispensaries, clinics etc; this is the primary table',
    'service_category': 'categories of services offered in facilities',
    'service': 'services offered in facilities',
    # 'service_cat_fac_mapper': 'service-category to facility mapper',
    # 'service_fac_mapper': 'service to facility mapper',
    # 'pharmacies': 'pharmacies'
}

ze_model = 'gpt-3.5-turbo' 

sql_db  = SQLDatabase(engine, include_tables=['tbl_kmfl'], sample_rows_in_table_info=5 )

In [92]:
list(sql_db._all_tables)
# print(sql_db.table_info)

['service', 'tbl_kmfl', 'service_category']

In [93]:
import os
import openai
from dotenv import load_dotenv
load_dotenv()

openai.api_key = os.getenv("OPEN_AI_KEY")


In [94]:
import tiktoken
from llama_index.callbacks import CallbackManager, TokenCountingHandler

token_counter = TokenCountingHandler(
    tokenizer= tiktoken.encoding_for_model(ze_model).encode
)

callback_manager = CallbackManager([token_counter])

In [95]:
from llama_index import ServiceContext, LLMPredictor, OpenAIEmbedding, PromptHelper
from llama_index.llms import OpenAI

llm = OpenAI(temperature=0.1, model=ze_model)

service_ctx = ServiceContext.from_defaults(
    llm=llm, callback_manager=callback_manager
)

In [96]:
from llama_index.indices.struct_store.sql_query import NLSQLTableQueryEngine

query_engine = NLSQLTableQueryEngine(
    sql_database=sql_db,
    service_context=service_ctx
)


In [97]:
# query_str = "how many tables are we querying from public schema?"
# # response = query_engine.query(query_str)
# response  = query_engine.query('facilities that have child services; produce postgres sql using "like" statement where possible')

In [98]:
# print(response)
# print(response.metadata['sql_query'])
# print(token_counter.total_llm_token_count)

#### Efficient querying?

In [99]:
from llama_index.objects import ObjectIndex, SQLTableNodeMapping, SQLTableSchema

tables = list(sql_db._all_tables)
table_node_mapping = SQLTableNodeMapping(sql_db)
table_schema_objs=[]

tables

for table in tables:
    table_schema_objs.append(SQLTableSchema(table_name=table, context_str=table_details[table]))

In [100]:
table_schema_objs

[SQLTableSchema(table_name='service', context_str='services offered in facilities'),
 SQLTableSchema(table_name='tbl_kmfl', context_str='Kenya facility list - list of hospitals, dispensaries, clinics etc; this is the primary table'),
 SQLTableSchema(table_name='service_category', context_str='categories of services offered in facilities')]

In [101]:
from llama_index.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index import VectorStoreIndex, GPTVectorStoreIndex

obj_index = ObjectIndex.from_objects(
    table_schema_objs, table_node_mapping, VectorStoreIndex, service_context = service_ctx
)

query_engine = SQLTableRetrieverQueryEngine(sql_db, obj_index.as_retriever(similarity_top_k=3), service_context=service_ctx)

In [108]:
resp = query_engine.query('top 20 facilities in the county level 5 and above - show facility and level; the higher the level number the bigger the hospital, above a level means hospital with higher number; produce psql using "ilike" statement where possible;')
# ; produce psql using "like" statement where possible
# ; in the produced sql query replace "like" with "ilike" 

# facilities open weekend -> very slow... interesting -> add record limit for faster output
### top 10 facilities that open facilities open weekend in kisii
# which are the level 4 facilities? -> very slow

# Seems chatting features are optimized for summaries
# how many facilities are level 4; produce psql using "like" statement where possible -> fast
# how many facilities operate 24hrs -> fast
# how many facilities do we have that are open weekend -> quite fast, seems queries should be more verbose

if 'result' in resp.metadata:
    print('recordslength: ', len(resp.metadata['result']))

print('metadata: ', resp.metadata['sql_query'])
print('tokens: ', token_counter.total_llm_token_count)
print('response:\n', resp)


recordslength:  20
metadata:  SELECT name, keph_level
FROM tbl_kmfl
WHERE keph_level >= 5
ORDER BY keph_level DESC
LIMIT 20;
tokens:  8305
response:
 Here are the top 20 facilities in the county with level 5 and above:

1. National Spinal Injury Hospital - Level 6
2. KNH Othaya Annex - Level 6
3. Kenyatta University Teaching Referral and Research Hospital - Level 6
4. Moi Teaching Referral Hospital - Level 6
5. Kenyatta National Hospital - Level 6
6. Mathari Hospital - Level 6
7. Mama Margaret Uhuru Hospital - Level 5
8. AAR Hospital - Muthaiga - Level 5
9. Komarock Healthcare-Kathwana - Level 5
10. Nairobi Hospital - Level 5
11. Embu Level 5 Hospital - Level 5
12. Meru Teaching And Referral Hospital - Level 5
13. Kijabe (AIC) Hospital - Level 5
14. Garissa Level 5 Teaching and Referral Hospital - Level 5
15. Chogoria (PCEA) Hospital - Level 5
16. Coast General Teaching and Referral Hospital - Level 5
17. Tabaka Mission Hospital - Level 5
18. Thika Level 5 Hospital - Level 5
19. Kakame

#### Query Todos

###### Replacements
- kenya, near me, my vicinity

###### Prompts
- add 'top 20' facilities if not added

###### user instructions
- for locations, add admin unit
- specify service-name, if the query is empty, rewrite 
- 