In [26]:
# !pip install openai
# !pip install sentence_transformers
# !pip install faiss-cpu

In [15]:
import pandas as pd
import numpy as np
import re, json, textwrap, os, openai
import requests
import time
import requests

from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts.prompt import PromptTemplate
from langchain.schema import messages_from_dict, messages_to_dict
from langchain.callbacks import get_openai_callback

from sentence_transformers import SentenceTransformer
import faiss
import torch

pd.set_option('display.max_colwidth', None)

### OpenAI API call authentication

In [12]:
def read_secret_key_from_file(filename):
    with open(filename, 'r') as file:
        api_key = file.read().strip()
    return api_key

api_key = read_secret_key_from_file('./final/config.txt')

In [3]:
path = '../mashqa_data/'

### Data Preprocessing

In [4]:
with open(f'{path}train_webmd_squad_v2_consec.json', 'r') as json_file:
    data = json.load(json_file)

In [5]:
def extract_question_context_answer_triples(data):
    question_context_answer_triples = []

    for item in data['data']:
        for paragraph in item['paragraphs']:
            context = paragraph['context']
            for qa in paragraph['qas']:
                question = qa['question']
                answers = [answer_info['text'] for answer_info in qa['answers']]
                for answer in answers:
                    question_context_answer_triples.append((question, context, answer))

    return question_context_answer_triples

In [6]:
question_context_answer_triples = extract_question_context_answer_triples(data)

In [7]:
df = pd.DataFrame(question_context_answer_triples, columns=['Question', 'Context', 'Answer'])

In [8]:
df.head(1)

Unnamed: 0,Question,Context,Answer
0,What surgical techniques are used to treat glaucoma?,"Treatment of open-angle glaucoma -- the most common form of the disease -- requires lowering the eye's pressure by increasing the drainage of aqueous humor fluid or decreasing the production of that fluid. Medications can accomplish both of these goals. Surgery and laser treatments are directed at improving the eye's aqueous drainage. If not diagnosed early, open-angle glaucoma may significantly damage vision and even cause blindness. That is why it's so important to have your eye doctor test you regularly for glaucoma. Once diagnosed, glaucoma is usually controlled with eye drops that reduce eye pressure. Glaucoma is a life-long condition and needs continual follow-up with your eye doctor. Both drugs and surgery have high rates of success in treating chronic open-angle glaucoma, but you can help yourself by carefully following the doctor's treatment plan. Some patients may find it difficult to follow a regimen involving two or three different eye drops. Be candid and tell the doctor if you cannot follow the medication schedule or if the eye drops cause unwanted side effects. There are frequently alternative treatments. Because of potential drug interactions, be sure to tell your doctor about any other medical problems you have or other medications you take. If glaucoma drops causes the eyes to become chronically red, consult your doctor about switching to preservative-free glaucoma drops that may alleviate the redness from preservatives. Acute angle-closure glaucoma is different from chronic open-angle glaucoma in several important ways: The symptoms usually occur with relative suddenness; the eye is painful and red. If the high pressure in the eye is not relieved quickly, blindness can occur. On the other hand, treatments for acute angle-closure glaucoma -- usually laser treatment -- are permanent and do not require long-term therapy. For this type of glaucoma, making a hole in the iris to allow fluid to drain, called an iridectomy, is the standard treatment to cure it. The unaffected eye also is usually treated to prevent a future attack. However, it's important to get your eyes checked regularly, as some people may develop a case of chronic angle-closure glaucoma later in life, even after laser treatment. If the glaucoma does not respond to medication, or if you cannot tolerate the side effects, your doctor may change medications or recommend one of several surgical techniques: Laser trabeculoplasty creates small laser burns in the area where the fluid drains, improving the outflow rate of aqueous fluid. This relatively brief procedure can often be done in an ophthalmologist's clinic. Trabeculectomy is a surgical procedure that creates a new channel for fluid outflow in cases in which the intraocular pressure is high and the optic nerve damage progresses. Long-term results vary, but generally, the success rate is good. Surgical implants that shunt fluid out of the eye may also be used to decrease pressure in the eye. Remember, all forms of medical or surgical treatment have potential benefits and risks. Before giving your consent, always ask the surgeon to clearly explain any treatment or surgery as well as the proposed benefits, effective alternatives, and potential risks or complications.","If the glaucoma does not respond to medication, or if you cannot tolerate the side effects, your doctor may change medications or recommend one of several surgical techniques: Laser trabeculoplasty creates small laser burns in the area where the fluid drains, improving the outflow rate of aqueous fluid. This relatively brief procedure can often be done in an ophthalmologist's clinic. Trabeculectomy is a surgical procedure that creates a new channel for fluid outflow in cases in which the intraocular pressure is high and the optic nerve damage progresses. Long-term results vary, but generally, the success rate is good. Surgical implants that shunt fluid out of the eye may also be used to decrease pressure in the eye. Remember, all forms of medical or surgical treatment have potential benefits and risks. Before giving your consent, always ask the surgeon to clearly explain any treatment or surgery as well as the proposed benefits, effective alternatives, and potential risks or complications."


In [9]:
df.shape

(19989, 3)

### Prompt Engineering

#### 1. Zero-shot

In [17]:
def get_template():
    template = """
    
    You are a chat assistant specialised in answering health questionnaire. 

    Current conversation:
    {history}

    Human: {input}
    Assistant:
    """

    PROMPT = PromptTemplate(
            input_variables=["history", "input"],
            template=template
                )
    
    return template, PROMPT

template, PROMPT = get_template()

In [18]:
llm = ChatOpenAI(temperature=0.1,
            openai_api_key=api_key,
            model_name = "gpt-3.5-turbo",
            #model_name = 'gpt-3.5-turbo-0613',
            #model_name = 'gpt-3.5-turbo-16k-0613',
            verbose=False)

Conversation = ConversationChain(
        llm=llm, 
        prompt=PROMPT, 
        verbose=False,
        memory=ConversationBufferMemory())

In [17]:
def interact_with_chat(user_input):
    # Run the conversation chain with the user input
    ai_response = Conversation.run(input=user_input)
    return ai_response

In [14]:
# Example user input
user_input = "How can you be smart with antibiotics?"

# Get AI response
response = interact_with_chat(user_input)
print(response)

To be smart with antibiotics, it is important to follow these guidelines:

1. Take antibiotics only when prescribed: Antibiotics are effective against bacterial infections, but they do not work against viral infections like the common cold or flu. Taking antibiotics unnecessarily can lead to antibiotic resistance, where bacteria become resistant to the drugs.

2. Complete the full course: Even if you start feeling better, it is crucial to complete the full course of antibiotics as prescribed by your healthcare provider. This ensures that all the bacteria causing the infection are eliminated, reducing the risk of recurrence or antibiotic resistance.

3. Don't share antibiotics: Antibiotics are prescribed based on specific infections and individual factors. Sharing antibiotics with others can lead to inappropriate use and may not effectively treat the infection.

4. Avoid using leftover antibiotics: If you have any leftover antibiotics from a previous prescription, do not use them withou

#### 2. Few-shot

In [48]:
def get_few_shot_template():
    # Example questions and answers (few-shot examples)
    few_shot_examples = """
    Q: What surgical techniques are used to treat glaucoma?
    A: If the glaucoma does not respond to medication, or if you cannot tolerate the side effects, your doctor may change medications or recommend one of several surgical techniques: Laser trabeculoplasty creates small laser burns in the area where the fluid drains, improving the outflow rate of aqueous fluid. This relatively brief procedure can often be done in an ophthalmologist's clinic. Trabeculectomy is a surgical procedure that creates a new channel for fluid outflow in cases in which the intraocular pressure is high and the optic nerve damage progresses. Long-term results vary, but generally, the success rate is good. Surgical implants that shunt fluid out of the eye may also be used to decrease pressure in the eye. Remember, all forms of medical or surgical treatment have potential benefits and risks. Before giving your consent, always ask the surgeon to clearly explain any treatment or surgery as well as the proposed benefits, effective alternatives, and potential risks or complications.
    
    Q: What are the best ways to treat glaucoma?
    A: Both drugs and surgery have high rates of success in treating chronic open-angle glaucoma, but you can help yourself by carefully following the doctor's treatment plan. Some patients may find it difficult to follow a regimen involving two or three different eye drops. Be candid and tell the doctor if you cannot follow the medication schedule or if the eye drops cause unwanted side effects. There are frequently alternative treatments. Because of potential drug interactions, be sure to tell your doctor about any other medical problems you have or other medications you take. If glaucoma drops causes the eyes to become chronically red, consult your doctor about switching to preservative-free glaucoma drops that may alleviate the redness from preservatives.
    
    Q: Is surgery for glaucoma dangerous?
    A: Before giving your consent, always ask the surgeon to clearly explain any treatment or surgery as well as the proposed benefits, effective alternatives, and potential risks or complications.
    
    """

    # Template with few-shot examples
    template = f"""
    You are a chat assistant specialised in answering health questions. Below are some examples of questions and answers:

    {few_shot_examples}

    Current conversation:
    {{history}}

    Human: {{input}}
    Assistant:
    """

    PROMPT = PromptTemplate(
        input_variables=["history", "input"],
        template=template
    )

    return template, PROMPT

# Use the new few-shot template
template, PROMPT = get_few_shot_template()

In [51]:
# Example user input
user_input = "How can you be smart with antibiotics?"

# Get AI response
response = interact_with_chat(user_input)
print(response)

To be smart with antibiotics, it is important to use them only when necessary and as prescribed by a healthcare professional. Here are some tips:

1. Take antibiotics exactly as prescribed: Follow the dosage instructions and complete the full course of treatment, even if you start feeling better. This helps ensure that all bacteria are eliminated and reduces the risk of antibiotic resistance.

2. Do not share antibiotics: Antibiotics are prescribed based on specific conditions and individual needs. Sharing antibiotics can lead to inappropriate use and may contribute to antibiotic resistance.

3. Do not save antibiotics for later use: Unused antibiotics should not be saved for future use or shared with others. Always consult a healthcare professional for a new prescription if needed.

4. Avoid using antibiotics for viral infections: Antibiotics are ineffective against viral infections such as the common cold or flu. They only work against bacterial infections. Using antibiotics unnecess

#### 3. Chain of Thought

In [52]:
def get_context_for_specific_question(df, question):
    # Filter the DataFrame for the specific question
    filtered_df = df[df['Question'] == question]

    # Check if there are any matching rows
    if not filtered_df.empty:
        # Return the context of the first matching row
        return filtered_df.iloc[0]['Context']
    else:
        return "No context found for the specified question."

# Call the function with the specific question
specific_question = "Is exercise OK if you have asthma?"
context_1 = get_context_for_specific_question(df, specific_question)

print(context_1)

There are different types of inhalers that serve different purposes and require different techniques. These inhalers help prevent flares and keep symptoms from getting worse. They're called control inhalers because they have medicine that controls inflammation. Use yours as often as your doctor tells you to, usually once or twice a day: Whether or not you're having symptoms Even if you feel like you're doing better If you're supposed to use it two times a day, aim for 12 hours apart. When you begin using this kind of inhaler, it may be 2 to 4 weeks before you notice the drugs start to work. Rescue or relief inhalers quickly bring back normal breathing when you are: Short of breath Wheezing Feeling tight in your chest Coughing You should keep a rescue inhaler with you all the time. Use it: When you have a flare of symptoms Before you're going to be around your asthma triggers When you run into unexpected triggers A rescue inhaler is for short-term symptom relief, not to control your ast

In [53]:
# Function to find the question and context for questions containing 'asthma' but not 'BDD'
def get_question_and_context_for_asthma_excluding_bdd(df):
    # Filter the DataFrame for rows where the 'Question' contains 'asthma' and does not contain 'BDD'
    asthma_rows = df[
        df['Question'].str.contains(r'\bOCD\b', case=False, regex=True) &
        ~df['Question'].str.contains(r'\bBDD\b', case=False, regex=True)
    ]
    
    # If any rows are found, return the 'Question' and 'Context' of the first such row
    if not asthma_rows.empty:
        question = asthma_rows.iloc[0]['Question']
        context = asthma_rows.iloc[0]['Context']
        return question, context
    else:
        return "No question with the keyword 'asthma' found, excluding 'BDD'.", None

# Call the function and print the results
question_with_asthma, context_2 = get_question_and_context_for_asthma_excluding_bdd(df)
# print("Question:", question_with_asthma)
# print("Context:", context_2)

print(context_2)

Does it take you forever to make a doctor's appointment, clean out your garage, or do your taxes? Putting off something that needs to be done is called procrastination. We all do it sometimes. But if you constantly struggle to finish tasks, there may be a bigger problem at play. Once you figure out your reasons, you can work on making the most of your time. When you're facing something you don't want to do, it can make you feel tense. Putting off that dreaded chore is a way to get some short-term relief. The downside is that you'll still have to tackle your task in the future, which may make you feel guilty or angry -- and cause your stress to rise more. If the tension's bad enough to keep you from getting things done, you might notice it affects you in other ways, too, like: Trouble sleeping Racing thoughts Lack of energy or trouble focusing Headaches or muscle tension Keep your mood in check with regular exercise, limit alcohol and caffeine (which can make stress worse), and get enou

In [54]:
# Function to find the question and context for questions containing 'asthma'
def get_question_and_context_for_asthma(df):
    # Filter the DataFrame for rows where the 'Question' column contains the word 'asthma'
    asthma_rows = df[df['Question'].str.contains(r'\bblood clot\b', case=False, regex=True)]
    
    # If any rows are found, return the 'Question' and 'Context' of the first such row
    if not asthma_rows.empty:
        question = asthma_rows.iloc[0]['Question']
        context = asthma_rows.iloc[0]['Context']
        return question, context
    else:
        return "No question with the keyword 'asthma' found.", None

# Call the function and print the results
question_with_asthma, context_3 = get_question_and_context_for_asthma(df)
print("Question:", question_with_asthma)
print("Context:", context_3)

Question: What will happen if blood clot moves to your lungs?
Context: After hurting your leg, you're probably dealing with some discomfort and inconvenience. But that's not all you have to be concerned about. This type of injury raises your chances for getting a blood clot. Any time a blood vessel gets damaged, the nearby blood can thicken and organize into a sticky clump, or clot. Some clots only affect veins near your skin's surface. This condition, called superficial thrombophlebitis, typically doesn't lead to serious problems. When a blood clot forms farther inside your leg, it's known as deep vein thrombosis ( DVT). These clots can be dangerous if they break loose and travel to your lungs. Doctors call this a pulmonary embolism ( PE). Trauma could result from a car accident, a sports injury, or even a fall. Common mishaps that may lead to a clot include: Broken bones Bad bumps Severe bruises Severe muscle injuries A 2008 study revealed even minor leg injuries -- ones that don't n

In [55]:
# Function to find the question and context for questions containing 'asthma'
def get_question_and_context_for_asthma(df):
    # Filter the DataFrame for rows where the 'Question' column contains the word 'asthma'
    asthma_rows = df[df['Question'].str.contains(r'\bprevent back pain\b', case=False, regex=True)]
    
    # If any rows are found, return the 'Question' and 'Context' of the first such row
    if not asthma_rows.empty:
        question = asthma_rows.iloc[0]['Question']
        context = asthma_rows.iloc[0]['Context']
        return question, context
    else:
        return "No question with the keyword 'asthma' found.", None

# Call the function and print the results
question_with_asthma, context_4 = get_question_and_context_for_asthma(df)
print("Question:", question_with_asthma)
print("Context:", context_4)

Question: How can you prevent back pain?
Context: There's a lot riding on your spinal column. It's your body's main structural support. It needs to keep you stable enough to stand upright but flexible enough for movement. So it's no surprise that many people have back problems from time to time. The hurt can stem from sore muscles, ligaments, and tendons, or from herniated disks, fractures, and other problems in your upper, middle, and lower back. Sometimes you feel the effects right away. But in many cases, back problems develop over time. We often bring on our back problems through bad habits, such as: Poor posture, like sitting incorrectly at a desk or behind the steering wheel Repeating the same motion or overdoing it Pushing, pulling, and lifting things carelessly The spine is actually a stack of 24 bones called vertebrae. A healthy spine is S-shaped when viewed from the side. It curves back at your shoulders and inward at your neck and small of your back. It houses and protects y

In [56]:
# Call the function with the specific question
specific_question = "Do most people know that antibiotics are not effective for colds or flu?"
context_5 = get_context_for_specific_question(df, specific_question)

print(context_5)

Looking for an effective flu treatment and wondering if antibiotics will work? Antibiotics are medications that fight infections caused by bacteria, but the flu is cause by a virus. Taking antibiotics when you have a virus may do more harm than good. Taking antibiotics when they are not needed increases your risk of getting an infection later that may resist antibiotic treatment. Antibiotics only cure certain infections due to bacteria -- and if taken carelessly, you may get more serious health problems than you bargained for. With any illness, it is critical to address the underlying cause, whether it's bacterial or viral. Antibiotics will not kill cold or flu viruses. Not at all. Antibiotics can save people's lives, and if you need them, you should get them as quickly as you can. Since only a doctor can prescribe antibiotics, this means that you should talk to your doctor if you think you might need them (as opposed to taking your friend's leftover antibiotics from last winter's illn

In [94]:
def get_chain_of_thought_template():
    # Example questions and answers (few-shot examples)
    context = context_5

    # Template with few-shot examples
    template = f"""
    You are a chat assistant specialized in answering health questions by thinking and aswering briefly. 
    Here is the context for the question:
    {context}

    Current conversation:
    {{history}}

    Using this information, think through the following question step by step and provide a brief answer:
    Question: {{input}}
    Reasoning:
    """

    PROMPT = PromptTemplate(
        input_variables=["history", "input"],
        template=template
    )

    return template, PROMPT

# Use the new few-shot template
template, PROMPT = get_chain_of_thought_template()

In [97]:
# Example user input
user_input = "How can you be smart with antibiotics?"

# Get AI response
response = interact_with_chat(user_input)
print(response)

To be smart with antibiotics, it is important to understand that antibiotics only work against bacterial infections, not viral infections like the flu. Taking antibiotics when they are not needed can lead to antibiotic resistance, making it harder to treat infections in the future. It is crucial to only take antibiotics as prescribed by a doctor, complete the full course of treatment, and not share antibiotics with others. Additionally, preventing the flu through vaccination and practicing good hygiene, such as frequent handwashing, can help reduce the need for antibiotics.


#### 4. RAG (Retrieval Augmented Generation)

In [100]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [114]:
# Assuming data is loaded into a DataFrame with 'Context' column
context_texts = df['Context'].tolist()

# Embedding the context texts
model = SentenceTransformer('sentence-transformers/all-MiniLM-l6-v2')
embeddings = model.encode(context_texts, convert_to_numpy=True)
# embeddings = embeddings.cuda().detach().numpy()

# Building FAISS index
index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)

def retrieve_context(question, k=3):
    question_embedding = model.encode([question])
    _, indices = index.search(question_embedding, k)
    return ' '.join([context_texts[i] for i in indices[0]])

# def ask_gpt_turbo(question, context, api_key):
#     openai.api_key = api_key
#     prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"
#     response = openai.Completion.create(model="gpt-3.5-turbo", prompt=prompt, temperature=0.7)
#     return response.choices[0].text.strip()

In [115]:
def ask_gpt_turbo(question, context, api_key):
    openai.api_key = api_key
    prompt = f"Context: {context}\nQuestion: {question}\nAnswer:"
    
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo", 
        messages=[
            {"role": "system", "content": "You are a chat assistant specialized in answering health questions. Give a brief to the point answer"},
            {"role": "user", "content": prompt}
        ]
    )
    return response.choices[0].message['content']

# Example usage
question = "How should you lift objects to prevent back pain?"
retrieved_context = retrieve_context(question)

# Query GPT-3.5 Turbo with the retrieved context
answer = ask_gpt_turbo(question, retrieved_context, api_key)
print(answer)

To prevent back pain when lifting objects: 
1. Stand close to the object you are lifting and keep your feet shoulder-width apart. 
2. Bend at your knees and hips, not your waist, to squat down and grip the object firmly.
3. Keep your back straight and tighten your core muscles.
4. Lift using the strength of your legs, not your back, and slowly rise to a standing position.
5. Hold the object close to your body and avoid twisting or bending while lifting.
6. Take small steps and pivot with your feet when changing direction.
7. If the object is too heavy, use a dolly, ask for help, or break it into smaller, manageable loads.


### Instruction Tuning

In [109]:
# Randomly select 10% of the DataFrame
df_sample = df.sample(frac=0.1, random_state=42)  # random_state for reproducibility

# Convert your DataFrame to the required format
def convert_to_openai_format(df):
    openai_format = []
    for _, row in df.iterrows():
        entry = {
            "messages": [
                {"role": "system", "content": "You are a chat assistant specialized in answering health questions."},
                {"role": "user", "content": row['Question']},
                {"role": "assistant", "content": row['Answer']}
            ]
        }
        openai_format.append(json.dumps(entry))

    return openai_format

# Write to a JSON Lines file
with open('fine_tune_data.jsonl', 'w') as file:
    for line in convert_to_openai_format(df_sample):
        file.write(f"{line}\n")

In [None]:
def upload_file(api_key, file_path):
    headers = {"Authorization": f"Bearer {api_key}"}
    response = requests.post(
        "https://api.openai.com/v1/files",
        headers=headers,
        files={"file": open(file_path, "rb"), "purpose": (None, "fine-tune")}
    )
    return response.json()

upload_response = upload_file(api_key, 'fine_tune_data.jsonl')
print(upload_response)

In [None]:
def create_fine_tuning_job(api_key, file_id):
    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
    data = {
        "training_file": file_id,
        "model": "gpt-3.5-turbo"
    }
    response = requests.post(
        "https://api.openai.com/v1/fine_tuning/jobs",
        headers=headers,
        json=data
    )
    return response.json()

fine_tune_response = create_fine_tuning_job(api_key, upload_response['id'])
print(fine_tune_response)

In [None]:
# Example usage of the fine-tuned model
ft_model_id = 'ft:gpt-3.5-turbo-0613:personal::xxxx'  # Replace with your fine-tuned model ID

response = ask_gpt_turbo("How is obsessive-compulsive disorder diagnosed?", '', api_key, ft_model_id=ft_model_id)
print(response)

In [None]:
def check_fine_tuning_status(api_key, job_id):
    headers = {"Authorization": f"Bearer {api_key}"}
    response = requests.get(
        f"https://api.openai.com/v1/fine_tuning/jobs/{job_id}",
        headers=headers
    )
    return response.json()

# Use this function to check the status
job_id = 'ftjob-xxxx'  # Replace with your actual job ID

while True:
    status_response = check_fine_tuning_status(api_key, job_id)
    status = status_response.get("status")
    print(f"Current status: {status}")
    if status == "succeeded":
        ft_model_id = status_response.get("fine_tuned_model")
        print(f"Fine-tuned model ID: {ft_model_id}")
        break
    elif status in ["failed", "cancelled"]:
        print("Fine-tuning job failed or was cancelled.")
        break
    else:
        time.sleep(60)  # Wait for 60 seconds before checking again