In [32]:
path_vector_db = "./data/spades/"

import os
import json
import openai
import sys
import flask
import telebot

from llama_index import VectorStoreIndex, SimpleDirectoryReader, LangchainEmbedding, PromptHelper, LLMPredictor, ServiceContext, set_global_service_context
from llama_index import load_index_from_storage, StorageContext, QuestionAnswerPrompt
from llama_index.llms import OpenAI
from llama_index.indices.postprocessor.node import SimilarityPostprocessor
from IPython.display import Markdown, display

from llama_index import Document

In [37]:

# Create a document with filename in metadata
document = Document(
    text='text',
    metadata={
        'filename': '<doc_file_name>',
        'category': '<category>'
    }
)

document.metadata = {'filename': '<doc_file_name>'}
filename_fn = lambda filename: {'file_name': filename}

from flask import Flask, request, jsonify
from fuzzywuzzy import fuzz, process
from dotenv import load_dotenv

# Set up GPT3.5-Turbo

# set up path which is parent directory of this file
path = os.getcwd()
load_dotenv(f"{path}/.env")
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
service_context = ServiceContext.from_defaults(llm=OpenAI(model="gpt-3.5-turbo", temperature=0, max_tokens= 100))
set_global_service_context(service_context)

BOT_TOKEN = os.getenv("BOT_TOKEN")
print(BOT_TOKEN)

# Prompt will be used to generate rich response,
# Modify this prompt to add instruction

QA_PROMPT_TMPL = (
    "You are DoctorAI and an Ultrasound Physician Assistant. You will use information immediately to suggest an ultrasound imaging procedure appropriate for improving the overall assessment if necessary.\n"
    "We have provided context information below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given this information, Explain and add conclusion at the end: {query_str}\n"
)

QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL)

# Define a list of questions and their corresponding answers

COMMON_QUESTIONS = {
  "Hello.": "Hi, I'm Doctor AI, your private medical advisor. The more details you provide about your symptoms, the more accurate advice I can offer. Please feel free to ask me any questions.",
  "Hi.": "Hi, I'm Doctor AI, your private medical advisor. The more details you provide about your symptoms, the more accurate advice I can offer. Please feel free to ask me any questions."
}

# rebuild storage context

storage_context = StorageContext.from_defaults(persist_dir=path_vector_db)
# load index. Ensure service_context add here to use customized LLM
index = load_index_from_storage(storage_context=storage_context,service_context=service_context)

# add in prompt template and synthesizer...
query_engine = index.as_query_engine(
    service_context=service_context,
    text_qa_template=QA_PROMPT,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.75)],
    verbose=True,
)

bot = telebot.TeleBot(BOT_TOKEN)

app = flask.Flask(__name__)

# Process webhook calls

@app.route('/', methods=['POST'])
def webhook():
    if flask.request.headers.get('content-type') == 'application/json':
        json_string = flask.request.get_data().decode('utf-8')
#        logger.debug(f"Request received: {json_string}")

        update = telebot.types.Update.de_json(json_string)
        bot.process_new_updates([update])
        return ('', 204)
    else:
        return ('Bad request', 400)

@bot.message_handler(func=lambda message: True)
def echo_message(message):

    # Check if the user typed "/start"
    if message.text == "/start":
        welcome_message = '''
Welcome to EduAI - Your 24/7 Education Advisor!

We are thrilled to have you join us in this revolutionary educational journey. At EduAI, our mission is to provide expert educational support to anyone, anytime, anywhere.

Powered by a state-of-the-art language model enriched with educational knowledge grounded by the relevant course notes, we aim to be your trusted companion for all your health-related inquiries.
        '''
        bot.reply_to(message, welcome_message)
        return

    # Check if the user typed "/start"
    if message.text == "/disclaimer":

        disclaimer_context = '''

DoctorAI Disclaimer

The information provided in response to patient questions on this Telegram channel is generated using a language model that incorporates content from the NHS website, which is publicly available and licensed under the Open Government Licence v3.0.

Please note the following:

Source Attribution: The information provided contains public sector information licensed under the Open Government Licence v3.0. It is derived from the NHS Website Content, but it is not directly cited from the NHS website or endorsed by the NHS specifically.
Content Adaptation: Some of the information presented here may be adapted or modified as part of the language model's responses. While efforts have been made to ensure accuracy, changes in wording or context may occur, which could affect the original meaning or impact of the content.
Risk and Responsibility: Any adaptation of NHS Website Content or use of non-refreshed NHS Website Content may invalidate its formal clinical approval.
Liability: As the provider of this information, I do not bear responsibility for the accuracy, completeness, or validity of the responses generated by the language model.
Seeking Professional Advice: The information provided here should not be considered a substitute for professional medical advice or consultation. If you have specific health concerns or require medical guidance, please consult a qualified healthcare professional or visit official NHS resources.
Independent Verification: It is advisable to independently verify critical information from official NHS sources before making any decisions based on the responses provided on this channel.
Personal Data: No personal data will be collected from the Telegram channel.

By using this Telegram channel, you acknowledge and agree to the above disclaimer. If you do not agree, please refrain from using the information provided here.

        '''

        bot.reply_to(message, disclaimer_context)
        return

    query = message.text
    print(query)

    scores = {}
    for question in COMMON_QUESTIONS:
        score = fuzz.token_sort_ratio(query, question)
        scores[question] = score
    # Find the question with the highest match score
    best_match = process.extractOne(query, COMMON_QUESTIONS.keys())

    # If the match score is above a certain threshold, provide the corresponding answer
    if best_match[1] > 75:
        answer = COMMON_QUESTIONS[best_match[0]]
        bot.reply_to(message, answer)
        return

    bot.reply_to(message, "Message received. Please wait for response...")

    response = query_engine.query(query)
    print(len(response.source_nodes))

    response_ext = Markdown(f"{response.response}").data

    response_ext = response_ext + "\n\nReference used to form this response:\n"

    # Assuming 'response' is the response from your query
    for i, source_node in enumerate(response.source_nodes):
        print(i)
        filename = source_node.node.metadata.get("file_path", None)
        print(filename)
        filename = filename.split('\\')[-1].replace('.txt', 'pdf')
        print(filename)
        # url = "https://www.nhs.uk/conditions/" + filename + "/"
        response_ext = response_ext + filename + "\n"

    # print(response_ext)

    if response_ext == "None":
        response_ext = "We were unable to find the answer in our current knowledge database. Kindly ask another question."

    bot.reply_to(message, response_ext)
    

if __name__ == '__main__':
    PORT = int(os.getenv("PORT")) if os.getenv("PORT") else 8080
    app.run(host="0.0.0.0", port=PORT, debug=True, use_reloader=False)

6599815538:AAE101fdtSmcMO5vMgjg8VTGcIgAl6-qlSc
 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8080
 * Running on http://10.12.1.33:8080
Press CTRL+C to quit
127.0.0.1 - - [09/Nov/2023 15:55:11] "POST / HTTP/1.1" 204 -


what is the minimum setback
2
0
data\spades\Setback_soup.html
Setback_soup.html
1
data\spades\Setback_soup.html
Setback_soup.html


127.0.0.1 - - [09/Nov/2023 15:56:23] "POST / HTTP/1.1" 204 -


what is the minimum setback for a 10 storey that is near a road
2
0
data\spades\Setback_soup.html
Setback_soup.html
1
data\spades\Setback_soup.html
Setback_soup.html


127.0.0.1 - - [09/Nov/2023 15:58:31] "POST / HTTP/1.1" 204 -


what is the minimum setback for a 10 storey residential building that is near a category 1 road
2
0
data\spades\Setback_soup.html
Setback_soup.html
1
data\spades\Setback_soup.html
Setback_soup.html


127.0.0.1 - - [09/Nov/2023 16:01:20] "POST / HTTP/1.1" 204 -


what is the maximum site coverage for residential buildings
2
0
data\spades\Site-Coverage_soup.html
Site-Coverage_soup.html
1
data\spades\Maximum-Number-DU_soup.html
Maximum-Number-DU_soup.html


In [36]:
path_vector_db

'./data/spades/'

In [33]:
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

# # Load the document, split it into chunks, embed each chunk and load it into the vector store.
# raw_documents = TextLoader('../../../state_of_the_union.txt').load()
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# documents = text_splitter.split_documents(raw_documents)
# db = FAISS.from_documents(documents, OpenAIEmbeddings())

In [34]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader(path_vector_db).load_data()
service_context = ServiceContext.from_defaults(chunk_size=1000)
index = VectorStoreIndex.from_documents(documents, service_context=service_context)
index.storage_context.persist(path_vector_db)


In [38]:
query_engine = index.as_query_engine()
response = query_engine.query("What is the setback for category 1 road")
print(response)

The setback for Category 1 road is 15m (inclusive of Green Buffer indicated in brackets).


In [25]:
filename = response.source_nodes[0].node.metadata.get("file_path", None)
filename = filename.split('/')[-1].replace('.txt', 'pdf')

In [24]:
import requests

bot_token = os.getenv("BOT_TOKEN")
webhook_url = 'https://9ad0-202-94-70-51.ngrok-free.app'

# Make an HTTP POST request to set the webhook
response = requests.post(
    f'https://api.telegram.org/bot{bot_token}/setWebhook',
    data={'url': webhook_url}
)

# Print the response to check if it was successful
print(response.text)


{"ok":true,"result":true,"description":"Webhook was set"}
