In [24]:
from langchain_community.document_loaders import PyPDFLoader
import lancedb
from langchain_community.vectorstores import LanceDB
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter



In [25]:
model = HuggingFaceEmbeddings(model_name = 'all-MiniLM-L6-v2')



In [31]:
path = [ 
      
        "../Built-RAG/nutrition-values.pdf",
        "../Built-RAG/DIfferentnutrients.pdf"
]

In [33]:
docs = [PyPDFLoader(url).load() for url in path]
docs_list = [item for sublist in docs for item in sublist]
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=50
)
doc_splits = text_splitter.split_documents(docs_list)


In [34]:
doc_splits

[Document(metadata={'source': '../Built-RAG/nutrition-values.pdf', 'page': 0}, page_content='Nutrient Value of Some Common Foods \nNutrient-Value_e.indd   1Nutrient-Value_e.indd   1 3/5/2008   12:36:28 AM3/5/2008   12:36:28 AM'),
 Document(metadata={'source': '../Built-RAG/nutrition-values.pdf', 'page': 1}, page_content='Nutrient Value of Some Common Foods\n1Nutrient Value of Some Common Foods\nHealth Canada is the federal department responsible for helping Canadians maintain and improve their health. \nWe assess the safety of drugs and many  consumer products, help improve the safety of food, and provide information to Canadians to help them make healthy  decisions. We provide health  services to First Nations people and to Inuit communities. We work with the provinces to ensure our health care system serves the needs of Canadians.\nPublished by authority of the Minister of Health.Nutrient Value of Some Common Foods\nis available on Internet at the following address: www.healthcanada.

In [47]:
db = lancedb.connect("/tmp/lancedb")
table = db.create_table(
    "food_table",
    data=[{"vector": model.embed_query("Hello World"), "text": "Hello World"}],
    mode="overwrite",
)

vectorstore = LanceDB.from_documents(
    documents=doc_splits,
    embedding=model,
    connection=table,
   

)
retriever = vectorstore.as_retriever()

In [44]:
docs=retriever.invoke("all info regarding peanut butter")

In [53]:
docs[0]


Document(metadata={'vector': [-0.03840845078229904, -0.04442517086863518, -0.07023125141859055, 0.02291584387421608, -0.04054643213748932, 0.01511758379638195, -0.012911302968859673, 0.021167058497667313, -0.05663911998271942, -0.013989065773785114, 0.02919369749724865, -0.06758999079465866, 0.00738611351698637, -0.15883494913578033, 0.0010464005172252655, -0.07244352996349335, 0.07081940025091171, 0.023286882787942886, -0.08654683083295822, -0.10961783677339554, 0.06413275003433228, -0.02185945026576519, -0.006542167626321316, -0.01151907630264759, 0.049961235374212265, 0.030563611537218094, 3.0383869670913555e-05, -0.0010395761346444488, -0.039682503789663315, -0.12163050472736359, -0.060957904905080795, 0.003393468214198947, 0.11423040181398392, 0.020570214837789536, -0.058935947716236115, 0.027678119018673897, -0.04169614985585213, -0.097458116710186, 0.0009452008525840938, -0.02461419254541397, 0.04600341245532036, -0.036549534648656845, 0.06861574947834015, -0.019676897674798965,

In [54]:
text = [d.page_content for d in docs]

In [55]:
text


['37Food Name\nMeasure\nWeight \nEnergyEnergyProteinCarbohydrateTotal SugarTotal Daietary FibreTotal FatSaturated FatMonounsaturated FatPolyunsaturated FatCalciumIronSodiumPotassiumMagnesiumPhosphorusFolate Vitamin B12Vitamin E\ng kcal kJ gggggggg m g m g m g m g m g m g D F E m c g m g\nLegumes, Nuts and Seeds\nLentils, pink, boiled 175mL 179 190 793 14 32 N/A 5.9 1 0.2 0.3 0.6 22 4.1 4 317 39 161 112 0 N/A\n Peas, \n split, boiled 175mL 145 171 715 12 31 4 4.2 1 0.1 0.1 0.2 20 1.9 3 525 52 144 94 0 N/A\n Soybeans, boiled 175mL 127 220 920 21 13 4 8.0 11 1.7 2.5 6.4 130 6.5 1 655 109 312 69 0 N/A\n Peanuts\n Peanut butter, chunk type, fat, sugar and salt added 30mL 32 191 799873 2 . 6 1 6 2 . 6 8 . 0 4 . 8 1 5 0 . 6 1 5 8 2 4 2 5 2 1 0 3 3 00 2 . 0\nPeanut butter, natural 30mL 31 184 770771 2 . 5 1 6 2 . 2 7 . 8 4 . 9 1 7 0 . 72 2 0 7 5 5 1 1 3 4 60 2 . 2\nPeanut butter, smooth type, fat, sugar and salt added 30mL 32 191 798863 1 . 8 1 6 3 . 3 7 . 7 4 . 5 1 4 0 . 6 1 4 9 2 1 0 5 0 1 1