In [10]:
!pip install --upgrade PyMuPDF streamlit openai faiss-cpu
import os
import fitz
import openai
import faiss
import numpy as np
# PyMuPDF: for accessing PDF files, extracting text, images, and metadata, and performing various other operations on PDF documents.
# streamlit: framework for creating interactive web applications for data science and machine learning.
# openai: official Python client library for the OpenAI API.
# faiss-cpu: FAISS (Facebook AI Similarity Search) is a library for efficient similarity search and clustering of dense vectors.



# **Extract Text from PDF**

In [3]:
from google.colab import drive
drive.mount('/content/drive')
folder_path = '/content/drive/My Drive/pdfs/'

Mounted at /content/drive


we'll be working with only the pdfs for now

In [11]:
"""fitz is a Python binding for the MuPDF library, which allows you to work with PDF files. """

def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page_num in range(len(document)):
        page = document.load_page(page_num)
        text += page.get_text()
    return text
# extracts text from each page of a PDF and concatenate it into a single string.
# List all PDF files in the folder

pdf_files = os.listdir(folder_path)
pdf_paths = [os.path.join(folder_path, pdf_file) for pdf_file in pdf_files if pdf_file.endswith('.pdf')]
pdf_text=""
# Extract text from each PDF
for pdf_path in pdf_paths:
    pdf_text += extract_text_from_pdf(pdf_path)
    print(f"PDF: {pdf_path} extracted")

PDF: /content/drive/My Drive/pdfs/Berrada Karim .pdf extracted


KeyboardInterrupt: 

# **Index the Extracted Text**

In [19]:
#openai.api_key = os.environ['sk-proj-q8SIAXcn3SQjIfM25mcLT3BlbkFJw4llMBxOTLm2R7b4Y1cv']

from openai import OpenAI

client = OpenAI(
  api_key='sk-proj-q8SIAXcn3SQjIfM25mcLT3BlbkFJw4llMBxOTLm2R7b4Y1cv',
)

In [25]:
# Set your OpenAI API key directly
openai.api_key = "sk-proj-q8SIAXcn3SQjIfM25mcLT3BlbkFJw4llMBxOTLm2R7b4Y1cv"
def get_openai_embeddings(text_list):
    embeddings = []
    for text in text_list:
        response = client.embeddings.create(input=text, model="text-embedding-3-small").data[0].embedding
        embeddings.append(response)
    return embeddings
def index(name):
    corpus_text = pdf_text
    corpus_chunks = corpus_text.split('\n')  # Split the text into chunks based on newline characters
    # Generate embeddings using OpenAI API
    corpus_embeddings = get_openai_embeddings(corpus_chunks)
    # Convert embeddings to numpy array
    corpus_embeddings = np.array(corpus_embeddings)
    # Create and populate the FAISS index
    index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
    index.add(corpus_embeddings)
    # Save the index
    faiss.write_index(index, f'{name}.faiss')
    # Save the chunks
    with open(f'{name}_chunks.txt', 'w') as f:
      for chunk in corpus_chunks:
        f.write(f"{chunk}\n")


# **Retrieve Relevant Information**

In [21]:
def retrieve_relevant_chunks(query, index, chunks, k ):
    """K is The number of relevant chunks to retrieve"""
    query_embedding = get_openai_embeddings([query])
    query_embedding = np.array(query_embedding)
    distances, indices = index.search(query_embedding, k)
    relevant_chunks = [chunks[idx] for idx in indices[0]]
    return relevant_chunks

# **Integrate with OpenAI GPT-4**

https://stackoverflow.com/questions/75774873/openai-api-error-this-is-a-chat-model-and-not-supported-in-the-v1-completions


https://stackoverflow.com/questions/77469966/openai-api-error-you-tried-to-access-openai-completion-but-this-is-no-longer

In [41]:
# Load the FAISS index and the corpus chunks
Corpus_name = 'DATA'  # Replace with your actual corpus name
index(Corpus_name)

# Load the corpus chunks
with open(f'{Corpus_name}_chunks.txt', 'r') as f:
    corpus_chunks = f.read().splitlines()

# Load the corpus index
corpus_index = faiss.read_index(f'{Corpus_name}.faiss')

# Define context
context = (
    """Task: I want you to answer the questions of doctors concerning medical knowledge and patient information. Identify symptoms and provide potential diagnoses along with prescription of Moroccan medication. Specify at the end that your suggestions are not 100% accurate and must be validated by a healthcare professional.
Persona: You are an expert in healthcare, both physical and mental. You are familiar with Moroccan medication and will only prescribe those. Respond in the language of the question.

Format:
- Use the language of the query to formulate your response.
- Provide concise responses related to the user's prompts, limited to 4 lines in clear bullet points unless more detail is requested.
- If more detailed information is requested, elaborate up to 1000 lines.

Tone:
- Maintain a professional yet friendly tone.
- Use clear and elegant language.

Reminder:
Always specify at the end of your response that the provided suggestions are not definitive and should not be acted upon without the supervision of a healthcare professional.
"""
)

# Function to get a response from GPT-4
def get_response(query):
    # Retrieve relevant chunks
    relevant_chunks = retrieve_relevant_chunks(query, corpus_index, corpus_chunks, 3)
    # Combine the relevant chunks into the prompt
    rag_prompt = context + "\n\n" + "\n\n".join(relevant_chunks) + "\n\nQ: " + query + "\nA:"
    response = 	openai.chat.completions.create(
        model="gpt-4",
        messages=[
        {"role": "assistant", "content": rag_prompt},
        ],
        #prompt=rag_prompt,
        max_tokens=500,
        n=1,
        stop=None,
        temperature=0.7
    )
    return response.choices[0].message.content.strip()

# Chat function
def chat():
    print("💬 Healthcare Chatbot")
    print("Welcome to the e-ESJ Chatbot! Use this tool to search patient information or enter the patient's symptoms and medical history to receive a potential diagnosis along with medication suggestions.")
    print("Type 'exit' to end the chat.")

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            break
        response = get_response(user_input)
        print(f"Chatbot: {response}")

# Start the chat
chat()

💬 Healthcare Chatbot
Welcome to the e-ESJ Chatbot! Use this tool to search patient information or enter the patient's symptoms and medical history to receive a potential diagnosis along with medication suggestions.
Type 'exit' to end the chat.
You: hi
Chatbot: Hello! How can I assist you today?
You: I wanted to know more about Parkinson's disease
Chatbot: - La maladie de Parkinson est une maladie neurologique progressive qui affecte le mouvement.
- Elle est caractérisée par des symptômes tels que des tremblements, une rigidité, des problèmes de coordination et d'équilibre.
- En Maroc, on peut prescrire des médicaments tels que la Lévodopa ou les agonistes de la dopamine pour aider à contrôler les symptômes.
- Cependant, il faut noter que ces suggestions ne sont pas 100% précises et doivent être validées par un professionnel de la santé.
You: exit
