In [44]:
from sentence_transformers import SentenceTransformer
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_community.llms import Ollama
import faiss
import pandas as pd
import numpy as np
import pickle
import time
from tqdm import tqdm

In [4]:
# Load the FAISS index
index = faiss.read_index("database/pdf_sections_index.faiss")

In [5]:
model = SentenceTransformer('all-MiniLM-L6-v2')

In [15]:
def search_faiss(query, k=3):
    query_vector = model.encode([query])[0].astype('float32')
    query_vector = np.expand_dims(query_vector, axis=0)
    distances, indices = index.search(query_vector, k)
    
    results = []
    for dist, idx in zip(distances[0], indices[0]):
        results.append({
            'distance': dist,
            'content': sections_data[idx]['content'],
            'metadata': sections_data[idx]['metadata']
        })
    
    return results

In [33]:
# Create a prompt template
prompt_template = """
You are an AI assistant specialized in dietary guidelines. Use the following pieces of context to answer the question at the end under 30 words. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context:
{context}

Question: {question}

Answer:"""

prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

llm = Ollama(
    model="llama3"
)

In [34]:
# Create the chain
chain = LLMChain(llm=llm, prompt=prompt)

def answer_question(query):
    # Search for relevant context
    search_results = search_faiss(query)
    
    # Combine the content from the search results
    context = "\n\n".join([result['content'] for result in search_results])

    # Run the chain
    response = chain.run(context=context, question=query)
    
    return response

In [32]:
qa_pairs = pd.read_csv('data/QA_pairs.csv')

In [45]:
time_list=[]
response_list=[]
for i in tqdm(range(len(qa_pairs))):
    query = qa_pairs['Questions'].values[i]
    start = time.time()
    response = answer_question(query)
    end = time.time()   
    time_list.append(end-start)
    response_list.append(response)

100%|███████████████████████████████████████████| 10/10 [00:46<00:00,  4.68s/it]


In [46]:
time_list

[1.478445053100586,
 2.386124849319458,
 3.6168229579925537,
 4.447862148284912,
 7.029644012451172,
 3.9442241191864014,
 6.469012022018433,
 5.324092864990234,
 7.501702785491943,
 4.536386728286743]

In [47]:
response_list

['At least every 5 years.',
 'The main purpose of the Dietary Guidelines for Americans is to provide foundational, evidence-based nutrition guidance to help individuals and their families consume a healthy, nutritionally adequate diet.',
 'The US Departments of Agriculture (USDA) and Health and Human Services (HHS).',
 'Less than 10% of total calories per day.',
 'According to the table, it is recommended to have:\n\n* Vegetables (cup eq/day): 2 ½\n* Fruits (cup eq/day): 2\n\nSo, in total, it is generally recommended to have at least 4.5 servings of fruits and vegetables per day (2.5 cups of vegetables + 2 cups of fruits).',
 'The recommended daily intake of whole grains is to be at least half of total grain consumption.',
 'The guidelines recommend that adults who choose to drink limit their daily intakes to 2 drinks or less in a day for men and 1 drink or less in a day for women, on days when alcohol is consumed.',
 'The dietary guidelines recommend that individuals limit their intak

In [None]:
ans

In [27]:
# Example usage
query = "What are the main dietary guidelines for protein intake?"

In [28]:
start = time.time()
answer = answer_question(query)
end = time.time()

  warn_deprecated(


In [31]:
print(answer)

Based on the provided context, I found Table A1-2 in Appendix 1 of the Dietary Guidelines for Americans, 2020-2025. The table shows the daily nutritional goals for macronutrients, minerals, and vitamins for different age-sex groups.

According to this table, the main dietary guidelines for protein intake are:

* Macronutrient Distribution Ranges (AMDR): 10-30% of total energy intake from protein for ages 2-3 to 51+, and 10-35% for ages 14-18.
* Recommended Dietary Allowance (RDA) for protein: varies by age group, ranging from 13g to 56g per day.

Please note that these guidelines are based on the average daily intake of protein foods, including meats, poultry, eggs, nuts, seeds, soy products, and seafood.


In [29]:
end-start

22.07294225692749

In [16]:
# Example usage
query = "What are the main dietary guidelines for protein intake?"

In [17]:
results = search_faiss(query)

In [18]:
results

[{'distance': 0.7191864,
  'content': 'ADULTS AGES 19-59Figure 4-4\nAverage Intakes of Subgroups  Compared to Recommended Intake Ranges:  Ages 31 Through 59\n50\n4540353025201510\n50OZ EQUIVALENT \noz eq/weekM F\nSeafoodM F\nMeats, Poultry,\nEggsM F\nTotal\nProtein Foods\noz eq/dayM F\nNuts, Seeds,\nSoy Products12\n1110\n9876543210OZ EQUIVALENT \noz eq/dayM F\nRefined GrainsM F\nWhole GrainsM F\nTotal\nGrainsCUP EQUIVALENT 9\n876543210\ncup eq/week\nM  F\nTotal \nVegetables\ncup eq/dayM  F\nDark-\nGreenM  F\nRed &\nOrangeM  F\nBeans,\nPeas,\nLentilsM  F\nStarchyM  F\nOther Average Intakes Recommended Intake Ranges\nVegetables \nGrains\nProtein FoodsAverage intakes of fruits, vegetables, and dairy \nfall below the range of recommended intakes for all adults. Although average total grains intakes meets recommendations, Figures 4-2 and 4-4 \nshow that intake of whole grains is well below recommendations, and intakes of refined grains exceeds the upper end of the recommended intake range f

In [10]:
indices

array([[210,  40, 273]])

In [13]:
with open('database/pdf_sections_data.pkl', 'rb') as f:
        sections_data = pickle.load(f)

In [14]:
sections_data

[{'content': 'Dietary\nGuidelines\nfor Americans \n2020 - 2025\nMake Every \nBite Count With the Dietary Guidelines\nDietaryGuidelines.gov',
  'metadata': {'source': 'Dietary_Guidelines_for_Americans_2020-2025.pdf',
   'page': 0}},
 {'content': 'This publication may be viewed and downloaded from the internet at \nDietaryGuidelines.gov. \nSuggested citation: U.S. Department of Agriculture and U.S. Department of \nHealth and Human Services. Dietary Guidelines for Americans, 2020-2025.   \n9th Edition. December 2020. Available at DietaryGuidelines.gov . \nIn accordance with Federal civil rights law and U.S. Department of Agriculture \n(USDA) and U.S. Department of Health and Human Services (HHS) civil rights \nregulations and policies, their Mission Areas, agencies, staff offices, employees, \nand institutions participating in or administering USDA programs are prohibited \nfrom discriminating based on race, color, national origin, religion, sex, gender \nidentity (including gender expres

In [21]:
import pandas as pd
qa_pairs = pd.read_csv('data/QA_pairs.csv')

In [22]:
qa_pairs

Unnamed: 0,Questions,Answers
0,How often are the Dietary Guidelines for Ameri...,The Dietary Guidelines for Americans are updat...
1,What is the main purpose of the Dietary Guidel...,To provide science-based advice on what to eat...
2,Which government agencies are responsible for ...,The U.S. Department of Health and Human Servic...
3,What is the recommended daily limit for added ...,The guidelines typically recommend limiting ad...
4,How many servings of fruits and vegetables are...,Most dietary guidelines recommend consuming at...
5,What is the recommendation for whole grain con...,The guidelines often suggest that at least hal...
6,How do the guidelines typically address alcoho...,"For adults who choose to drink, the guidelines..."
7,What is the stance on saturated fats in the diet?,The guidelines generally recommend limiting sa...
8,How do the Dietary Guidelines address sodium i...,They typically recommend limiting sodium intak...
9,"What is the ""MyPlate"" concept often associated...",MyPlate is a visual representation of how to b...


In [None]:
for 