# Gen AI RAG Project

# Import Necessary Libraries

In [1]:
import pandas as pd
import numpy as np

from openai import OpenAI

## OPEN AI EMBEDDINGS:
from langchain_openai import OpenAIEmbeddings
import os
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_text_splitters import RecursiveCharacterTextSplitter, CharacterTextSplitter
#from langchain_experimental.text_splitter import SemanticChunker

from IPython.display import display, HTML, Markdown




# Initialize Embeddings from OpenAI

In [None]:
API_KEY = ""

# Create the embeddings function
embeddings = OpenAIEmbeddings(model="text-embedding-3-small", api_key = API_KEY)

# create a text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=50, )
print('Cell finished')

Cell finished


## 1. Load Data

In [3]:
# load the document and split it into chunks
document_dir = "./data"
filename = "allergies_ok.pdf"
file_path = os.path.join(document_dir, filename)



# 2. EDA

# 3. Divide into chunks

In [None]:
pages = PyPDFLoader(file_path).load_and_split() # Split the document in pages

docs = text_splitter.split_documents(pages) # Split the pages into chunks


In [5]:
print(docs[0])

page_content='Overview 
Millions of Americans have an allergy of some kind. You probably know one of those 
people or are one yourself. Almost 6% of U.S. adults and children have a food allergy. 
Food allergy symptoms are most common in babies and children, but they can appear at 
any age. You can even develop an allergy to foods you have eaten for years with no 
problems. 
 
Signs of Allergies 
The body’s immune system keeps you healthy by fighting off infections and other dangers 
to good health. A food allergy reaction occurs when your immune system overreacts to a 
food or a substance in a food, identifying it as a danger and triggering a protective 
response.' metadata={'source': './data\\allergies_ok.pdf', 'page': 0}


In [6]:

print(type(docs[0]))



<class 'langchain_core.documents.base.Document'>


# 4. Embeddings into Chroma DB

In [7]:
# Load embeddings and save them into Chroma
db = Chroma.from_documents(docs, embeddings, persist_directory="./allergy_chroma_db")
print('Cell finished')

Cell finished


# 5. Obtain the k number of most similar results to the user's query

In [8]:
user_question = input("Ask a question about allergies: ")
docs = db.similarity_search(user_question, k=3)

# 6. Build the prompt based on the similarity search results

### Build function to create the content for the prompt

In [9]:
def _get_document_context(docs):
    context = '\n'
    for doc in docs:
        context += '\nContext:\n'
        context += doc.page_content + '\n\n'
    return context

### Builld a dynamic prompt including the context based on the results from the query

In [27]:
def generate_prompt(user_question, docs):
    """
    This functions uses a template to generate a dynamic prompt that can be adapted to the user's query

    Arguments: user_question: str, docs :str
    """
    prompt = f"""
        INTRODUCTION
        You are a knowledgeable assistant trained to answer questions about allergies, symptoms, and management strategies. Your responses should be clear, concise, and focused on accurate information.

        The user asked: "{user_question}"

        CONTEXT
        Technical documentation for allergies, symptoms, and management of allergen ingestion:
        '''
        {_get_document_context(docs)}
        '''

        RESTRICTIONS
        Always refer to products or allergens by their specific names as mentioned in the documentation.
        Stick to facts and provide clear, evidence-based responses; avoid opinions or interpretations.
        Only respond if the answer can be found within the context. If not, let the user know that the information is not available.
        Do not engage in topics outside allergies, symptoms, and related health matters. Avoid humor, sensitive topics, and speculative discussions.
        If the user’s question lacks sufficient details, request clarification rather than guessing the answer. For example, if the user does not ask anything related to allergies, allergies symptoms, or allergies management, you should request clarification.
        EXAMPLE:
            example 1:
                User: 'I ate eggs'
                Agent: 'I hope they tasted amazing. Are you allergic to eggs?'

            example 2: 
                User: 'I think I have an allergy to eggs'
                Agent: 'Egg allergies are common and can cause a range of symptoms, from mild to more severe reactions. Here are some typical signs and management steps:
                        Symptoms of an Egg Allergy
                        Mild Reactions: Skin reactions like hives, eczema, or redness; digestive issues such as cramps, nausea, or vomiting; and runny nose or sneezing.
                        Severe Reactions (Anaphylaxis): Difficulty breathing, swelling of the throat, rapid pulse, dizziness, or loss of consciousness.
                        If you experience severe symptoms, you should seek medical help immediately, as anaphylaxis requires prompt treatment.

                        Management and Avoidance Tips
                        Avoid Egg-Based Foods: Eggs can be hidden in foods, so check labels for ingredients like “albumin” or “lysozyme” that indicate eggs.
                        Consider Egg Substitutes: For baking, substitutes like applesauce, banana, or commercial egg replacers can be helpful.
                        Discuss with Your Doctor: They may suggest an allergy test to confirm the allergy or advise on an emergency plan, such as carrying an epinephrine auto-injector if needed.
                        If you’re experiencing ongoing symptoms or suspect an allergy, consulting with an allergist is recommended for personalized advice and treatment.
        TASK
        Provide a direct answer based on the user’s question, if possible.
        Guide the user to relevant sections of the documentation if additional context is needed.

        EXAMPLES:
        RESPONSE STRUCTURE:
        '''
        # [Answer Title]
        [answer text]
        '''
        CONVERSATION:
        User: {user_question}
        Agent:
        """
    return prompt

# 6. Initialize OpenAI client/Assistant

In [15]:
client = OpenAI(api_key = API_KEY)

#messages = [{'role':'user', 'content':prompt}]
model_params = {'model': 'gpt-4o-mini', 'temperature': 0.4, 'max_tokens': 200}
#completion = client.chat.completions.create(messages=messages, **model_params, timeout=120)


#answer = completion.choices[0].message.content
#model = completion.model

In [None]:
query = f"### Question: _{user_question}_"

from IPython.display import display, HTML, Markdown
display(Markdown(query))
display(Markdown(answer))

### Question: _sesame allergies_

'''
# Sesame Allergies
Sesame is the 9th most common food allergen and can be found in many popular dishes, including hummus (under the name "tahini"). According to the FDA, sesame was added as the 9th major food allergen effective January 1, 2023, under the FASTER Act of 2021. Before this date, manufacturers were not required to list it as an allergen, although it typically appears in the ingredient statement unless it is part of a natural flavoring or spice.

If you suspect you have a sesame allergy, it is important to consult with a board-certified allergist for proper testing and management strategies. Symptoms can range from mild reactions to severe anaphylaxis, and having an emergency action plan is crucial for those at risk.
'''

# 8. OpenAI assistant (LLM as a judge)

### Initialize OpenAI assistant

In [None]:
assistant = client.beta.assistants.create(
  name="Food allergies expert",
  instructions="You are an expert in food allergies",
  model="gpt-4o-mini",
  tools=[{"type": "file_search"}]
  )

### Create the vector store for the PDF we are using

In [35]:
# Create a vector store caled "Datavisualization Documents"
vector_store = client.beta.vector_stores.create(name="allergies_document")

### Save the vectors and prepare to upload the new vector store

In [36]:
file_paths = [file_path]
file_streams = [open(path, "rb") for path in file_paths]

file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)

### Update the OpenAI assistant with the new tool (vector store)

In [None]:
assistant = client.beta.assistants.update(
  assistant_id='asst_uSxFh8uPZ0nzwMShVb3J8vhF',
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)

In [12]:
references = pd.read_csv('./data/References for Evaluation.csv')
references.head()

Unnamed: 0,Question,Answer
0,What are the most common food allergens?,"The most common food allergens include milk, e..."
1,Can you outgrow food allergies?,"Yes, children may outgrow allergies to milk, e..."
2,How is a food allergy diagnosed?,"Diagnosis involves a medical history review, s..."
3,What is anaphylaxis?,"Anaphylaxis is a severe, life-threatening alle..."
4,How can I prevent food allergies?,Prevention strategies include delaying the int...


In [13]:
references_questions = references['Question']
references_answers = references['Answer']


In [28]:
references['Answer'] = [
    "The most common food allergens, often referred to as the 'Big Eight,' include milk, eggs, peanuts, tree nuts, fish, shellfish, soy, and wheat. These allergens are responsible for the majority of allergic reactions in the population. Each of these foods can provoke a range of symptoms, from mild reactions like hives to severe anaphylactic responses. It's essential for individuals with food allergies to read labels carefully and avoid these allergens to prevent adverse reactions. Awareness and education about these common allergens are crucial for managing food allergies effectively.",
    
    "Yes, some individuals can outgrow food allergies, particularly allergies to milk, eggs, soy, and wheat. Studies indicate that a significant percentage of children with these allergies may become tolerant as they age. However, allergies to peanuts, tree nuts, fish, and shellfish are less likely to be outgrown. Regular follow-ups with an allergist can help monitor changes in allergy status and determine if it’s safe to reintroduce certain foods into the diet.",
    
    "Food allergies are typically diagnosed through a combination of patient history, skin prick tests, and blood tests that measure specific IgE antibodies. An allergist may also recommend an oral food challenge, where the patient consumes the suspected allergen under medical supervision to observe for any reactions. Accurate diagnosis is crucial for effective management and to avoid unnecessary dietary restrictions.",
    
    "Anaphylaxis is a severe, potentially life-threatening allergic reaction that can occur within minutes of exposure to an allergen. Symptoms may include difficulty breathing, swelling of the throat, rapid heartbeat, and a drop in blood pressure. Immediate treatment with an epinephrine auto-injector is essential, as it can reverse the symptoms and save lives. Individuals at risk of anaphylaxis should carry an epinephrine auto-injector at all times and have an action plan in place.",
    
    "While not all food allergies can be prevented, certain strategies can reduce the risk. Introducing allergenic foods to infants at an early age, particularly for high-risk children, may help prevent allergies. It’s also important to avoid known allergens and educate family and caregivers about food allergies. Reading food labels carefully and communicating with restaurants about dietary restrictions can further help in preventing accidental exposure.",
    
    "Currently, the primary treatment for food allergies is strict avoidance of the allergenic food. In cases of accidental exposure, antihistamines can alleviate mild reactions, while epinephrine is necessary for severe reactions like anaphylaxis. Ongoing research is exploring immunotherapy options, which may help desensitize individuals to specific allergens over time, but these treatments are still under investigation.",
    
    "Yes, food allergens can remain on surfaces, utensils, and even in the air, posing a risk for cross-contamination. For example, traces of peanut butter on a knife can transfer to other foods. It’s crucial for individuals with food allergies to practice strict hygiene, including washing hands and surfaces thoroughly, to minimize the risk of accidental exposure.",
    
    "Yes, it is possible to develop food allergies as an adult, even if you have previously consumed the food without any issues. Adult-onset food allergies can be triggered by various factors, including changes in the immune system or exposure to new allergens. Symptoms may vary and can sometimes be more severe than those experienced in childhood allergies.",
    
    "Symptoms of a food allergy can vary widely and may include hives, swelling, abdominal pain, nausea, vomiting, diarrhea, and respiratory issues like wheezing or difficulty breathing. In severe cases, anaphylaxis can occur. It’s important to recognize these symptoms and seek medical attention if an allergic reaction is suspected.",
    
    "Food allergy symptoms can appear within minutes to a few hours after exposure to the allergen. In some cases, symptoms may be delayed and can take several hours to manifest, making it challenging to identify the trigger. Monitoring symptoms and keeping a food diary can help in recognizing patterns and identifying allergens.",
    
    "Oral allergy syndrome (OAS) is a condition where individuals with pollen allergies experience allergic reactions to certain raw fruits, vegetables, or nuts due to cross-reacting proteins. Symptoms typically include itching or swelling in the mouth and throat shortly after eating these foods. OAS is generally mild and resolves quickly, as the proteins involved are similar to those found in pollen.",
    
    "While many people mistakenly refer to gluten intolerance as a gluten allergy, the correct term is celiac disease, which is an autoimmune disorder. Celiac disease affects a small percentage of the population, but non-celiac gluten sensitivity is more common. Individuals with these conditions must adhere to a strict gluten-free diet to avoid symptoms and complications.",
    
    "Managing food allergies involves strict avoidance of the allergenic foods, educating oneself and others about the allergy, and having an emergency action plan in place. Carrying an epinephrine auto-injector is crucial for those at risk of anaphylaxis. Regular consultations with an allergist can help monitor the condition and provide guidance on managing allergies effectively.",
    
    "Using an epinephrine auto-injector is straightforward. First, remove the cap and hold the injector in your fist, with the tip pointing down. Place the tip against the outer thigh and press firmly until you hear a click. Hold it in place for about 3 seconds, then remove it and massage the injection site for 10 seconds. Seek emergency medical help immediately after using the injector, as further treatment may be necessary.",
    
    "The cost of food allergy testing can vary widely depending on the type of tests performed and the healthcare provider. Skin prick tests and blood tests can range from $100 to several hundred dollars. Insurance coverage may help offset some costs, but it’s essential to check with your provider beforehand. Regular follow-ups and consultations with an allergist can also contribute to overall costs.",
    
    "Yes, individuals with food allergies must adhere to strict dietary restrictions to avoid allergens. This includes reading food labels carefully, avoiding cross-contamination, and being cautious when dining out. It’s important to communicate dietary restrictions clearly to family, friends, and restaurant staff to ensure safety.",
    
    "Cross-reactive allergens occur when proteins in one substance are similar to those in another, leading to allergic reactions. For example, individuals allergic to certain pollens may also react to specific fruits and vegetables due to similar protein structures. Understanding cross-reactivity is important for managing allergies and avoiding unexpected reactions.",
    
    "Yes, food allergies can lead to gastrointestinal issues such as nausea, vomiting, abdominal pain, and diarrhea. These symptoms can occur shortly after consuming the allergenic food and may vary in severity. It’s essential to differentiate between food allergies and intolerances, as the management strategies differ.",
    
    "Precautionary labeling statements, such as 'may contain' or 'processed in a facility that handles,' are used by manufacturers to indicate potential cross-contamination with allergens. While these labels are not mandatory, they serve as a warning for individuals with food allergies. It’s crucial for consumers to take these labels seriously and avoid products that may pose a risk.",
    
    "In case of a severe allergic reaction, such as anaphylaxis, administer an epinephrine auto-injector immediately and call emergency services. Lay the person down and elevate their legs if they are feeling faint. Monitor their symptoms and be prepared to administer a second dose of epinephrine if symptoms do not improve within 5 to 15 minutes. Ensure that the person receives medical attention as soon as possible."
]

In [None]:
new_reference_answers = references['Answer']

0     The most common food allergens, often referred...
1     Yes, some individuals can outgrow food allergi...
2     Food allergies are typically diagnosed through...
3     Anaphylaxis is a severe, potentially life-thre...
4     While not all food allergies can be prevented,...
5     Currently, the primary treatment for food alle...
6     Yes, food allergens can remain on surfaces, ut...
7     Yes, it is possible to develop food allergies ...
8     Symptoms of a food allergy can vary widely and...
9     Food allergy symptoms can appear within minute...
10    Oral allergy syndrome (OAS) is a condition whe...
11    While many people mistakenly refer to gluten i...
12    Managing food allergies involves strict avoida...
13    Using an epinephrine auto-injector is straight...
14    The cost of food allergy testing can vary wide...
15    Yes, individuals with food allergies must adhe...
16    Cross-reactive allergens occur when proteins i...
17    Yes, food allergies can lead to gastrointe

In [30]:
# choose the question
generated_answers = []
for question in references_questions:
    docs = db.similarity_search(question, k=3)
    prompt = generate_prompt(question, docs)
    messages = [{'role':'user', 'content':prompt}]
    completion = client.chat.completions.create(messages=messages, **model_params, timeout=120)
    answer = completion.choices[0].message.content
    generated_answers.append(answer)

print(generated_answers)

["```\n# Most Common Food Allergens\nThe most common food allergens include:\n- Milk\n- Eggs\n- Peanuts\n- Tree nuts\n- Fish\n- Shellfish\n- Wheat\n- Soy\n- Sesame (the 9th most common food allergen)\n\nIn adults, additional allergens may include fruit and vegetable pollen, which can cause oral allergy syndrome. It's important to note that individuals allergic to one type of nut may also react to related nuts, and those allergic to shrimp may have reactions to crab and lobster.\n```", "'''\n# Can You Outgrow Food Allergies?\nYes, it is possible to outgrow food allergies. Children generally, but not always, outgrow allergies to milk, egg, soy, and wheat. Research indicates that up to 25 percent of children may outgrow their peanut allergy, with slightly fewer expected to outgrow a tree nut allergy. However, food allergies that develop in adulthood tend to be lifelong, and the chances of outgrowing them are much lower.\n'''", "```\n# Food Allergy Diagnosis\nFood allergies are diagnosed t

In [31]:
references['Generated Answers'] = generated_answers

In [18]:
references.head()

Unnamed: 0,Question,Answer,Generated Answers
0,What are the most common food allergens?,"The most common food allergens include milk, e...",```\n# Most Common Food Allergens\nThe most co...
1,Can you outgrow food allergies?,"Yes, children may outgrow allergies to milk, e...","```\n# Can You Outgrow Food Allergies?\nYes, i..."
2,How is a food allergy diagnosed?,"Diagnosis involves a medical history review, s...",```\n# Diagnosis of Food Allergy\nA food aller...
3,What is anaphylaxis?,"Anaphylaxis is a severe, life-threatening alle...",```\n# What is Anaphylaxis?\nAnaphylaxis is a ...
4,How can I prevent food allergies?,Prevention strategies include delaying the int...,```\n# Preventing Food Allergies\nPreventing f...


In [21]:
import evaluate
rouge = evaluate.load('rouge')


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [32]:
LLM_score = rouge.compute(
    predictions=references['Generated Answers'],
    references=references['Answer'],
    use_aggregator=True,
    use_stemmer=True,
)

In [23]:
print(LLM_score)

{'rouge1': 0.2544145908328652, 'rouge2': 0.09873737735431562, 'rougeL': 0.19508621570141554, 'rougeLsum': 0.20534595858380839}


In [None]:
LLM_score = rouge.compute(
    predictions=references['Generated Answers'],
    references=references['Answer'],
    use_aggregator=True,
    use_stemmer=True,
)
print(LLM_score)

{'rouge1': 0.443993312135548, 'rouge2': 0.18149965422323014, 'rougeL': 0.310653516989158, 'rougeLsum': 0.3188974416767258}


In [25]:
display(references['Question'])

0              What are the most common food allergens?
1                       Can you outgrow food allergies?
2                      How is a food allergy diagnosed?
3                                  What is anaphylaxis?
4                     How can I prevent food allergies?
5     What treatments are available for food allergies?
6                 Can food allergens remain on objects?
7           Can you develop food allergies as an adult?
8                What symptoms indicate a food allergy?
9     How long do food allergy symptoms take to appear?
10                       What is oral allergy syndrome?
11                            Is gluten allergy common?
12                     How can I manage food allergies?
13           How do I use an epinephrine auto-injector?
14               How expensive is food allergy testing?
15    Are there any dietary restrictions for allergens?
16                   What are cross-reactive allergens?
17    Can food allergies cause gastrointestinal 

In [1]:
from unstructured.partition.pdf import partition_pdf
from pdf2image import convert_from_path

In [None]:

dash_line = '------------------------'
print(dash_line)
print(f'BASELINE REFERENCE:\n{test_answer}')
print(dash_line)
print(f'OUR ANSWERS:\n{answer}')
print(dash_line)


------------------------
BASELINE REFERENCE:
Anaphylaxis is a severe, life-threatening allergic reaction that can impair breathing, cause a drop in blood pressure, and may be fatal without prompt treatment.
------------------------
OUR ANSWERS:
'''
# What is Anaphylaxis?
Anaphylaxis is a potentially life-threatening allergic reaction that can occur within seconds or minutes after exposure to an allergen. It is characterized by symptoms such as swelling of the airways, which impairs breathing, and a sudden drop in blood pressure, leading to dizziness and fainting. This severe reaction is caused by the whole-body release of chemicals that can send the body into shock. 

In the U.S., food allergies are the leading cause of anaphylaxis outside the hospital setting. The first-line treatment for anaphylaxis is epinephrine (adrenaline), which is administered via an auto-injector. It is crucial for individuals with known allergies to carry an auto-injector and to be educated on its use, as ana