# Url Loader Testing


In [10]:
from PyPDF2 import PdfReader
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

In [22]:
# Load env
from dotenv import load_dotenv
import os

load_dotenv()

COHERE_API_KEY = os.getenv("COHERE_API_KEY") or ""

In [12]:
# location of the pdf file/files.
reader = PdfReader("./pdfs/Sports-And-Exercise-Nutrition.pdf")

In [13]:
# read data from the file and put them into a variable called raw_text
raw_text = ""
for i, page in enumerate(reader.pages):
    text = page.extract_text()
    if text:
        raw_text += text

In [14]:
raw_text[:100]

'Lanham_bindex.indd   388Lanham_bindex.indd   388 7/28/2011   4:40:16 PM7/28/2011   4:40:16 PMSport a'

In [15]:
# We need to split the text that we read into smaller chunks so that during information retreival we don't hit the token size limits.

text_splitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
)
texts = text_splitter.split_text(raw_text)

Created a chunk of size 1398, which is longer than the specified 1000
Created a chunk of size 1433, which is longer than the specified 1000
Created a chunk of size 1419, which is longer than the specified 1000
Created a chunk of size 1102, which is longer than the specified 1000
Created a chunk of size 1230, which is longer than the specified 1000
Created a chunk of size 1248, which is longer than the specified 1000
Created a chunk of size 1496, which is longer than the specified 1000
Created a chunk of size 1050, which is longer than the specified 1000
Created a chunk of size 2026, which is longer than the specified 1000
Created a chunk of size 1118, which is longer than the specified 1000
Created a chunk of size 1029, which is longer than the specified 1000
Created a chunk of size 1571, which is longer than the specified 1000
Created a chunk of size 1631, which is longer than the specified 1000
Created a chunk of size 2110, which is longer than the specified 1000
Created a chunk of s

In [16]:
len(texts)

2046

In [17]:
texts[0]

'Lanham_bindex.indd   388Lanham_bindex.indd   388 7/28/2011   4:40:16 PM7/28/2011   4:40:16 PMSport and Exercise Nutrition\nLanham_ffirs.indd   iLanham_ffirs.indd   i 7/28/2011   5:49:59 PM7/28/2011   5:49:59 PMIntroduction to Human Nutrition\nIntroduction to human nutrition: a global \nperspective on food and nutrition\nBody compositionEnergy metabolismNutrition and metabolism of proteins and amino acidsDigestion and metabolism of carbohydratesNutrition and metabolism of lipidsDietary reference standardsThe vitaminsMinerals and trace elementsMeasuring food intakeFood compositionFood and nutrition: policy and regulatory issuesNutrition research methodologyFood safety: a public health issue of growing importanceFood and nutrition-related diseases: the global challenge\nPublic Health Nutrition\nAn overview of public health nutritionNutrition epidemiologyFood choiceAssessment of nutritional status at individual and \n population level'

In [23]:
# Download embeddings from OpenAI
embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY)

In [25]:
docsearch = FAISS.from_texts(texts, embeddings)

In [26]:
docsearch

<langchain.vectorstores.faiss.FAISS at 0x2afcd1d50>

In [28]:
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import Cohere

In [36]:
chain = load_qa_chain(
    Cohere(cohere_api_key=COHERE_API_KEY, verbose=True),
    chain_type="stuff",
    verbose=True,
)

In [37]:
query = "who are the authors of the textbook?"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)



[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

product names used in this book are trade names, service marks, tra demarks or registered trademarks of their respective 
owners. The publisher is not associated with any product or vendor mentioned in this book. This publication is designed to provide accurate and authoritative information in regard to the subject matter covered. It is sold on the understanding that the publisher is not engaged in rendering professional servic es. If professional advice or other expert assistance is 
required, the services of a competent professional should be sought.Library of Congress Cataloging-in-Publication DataSport and exercise nutrition / edited on behalf of the Nutrition Society by Susan Lanham-New ... [et al.].  p. cm. – (Th

' The editors of the textbook are Dr Samantha Stear, Dr Susan Shirreffs and Dr Adam Collins.'

In [46]:
query = "Tell me about nutrition for weight training"
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)



[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

● A sports nutritionist who knows the demands of the specific sport should plan individual nutrition-ally adequate diets. Throughout this process, the role of overall good nutrition practices in optimising performance should be emphasised. The diet should aim to have a protein intake of 1.4–2 g/kg, a carbohy-drate intake of 4–6 g/kg, and 20% fat or more. The focus should be on low-energy/high-nutrient  density foods that provide satiety as well as food variety and a frequent meal pattern making sure that the athlete is not fatigued during training sessions. Emphasise recovery meals containing carbohydrates and protein within 30 min after training sessions to optimise recovery, and include dairy food sources to meet Rec

'\nWeight training requires a high-calorie diet with a focus on nutrient-dense carbohydrates and small serves of high-quality protein at meals and snacks. The protein recommendations range from 1.4 to 1.7 g/kg, aiming for the upper level if there is a substantial energy restriction. This may assist in the maintenance of lean body mass and helps promote satiety at meal times. Fat is needed for insulation and protection, as well as for fat-soluble vitamins and essential fatty acids and should therefore not be totally excluded from the diet.'