In [1]:
import pandas as pd
import os
from typing import List, Dict, Any
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter, TokenTextSplitter
from langchain_community.document_loaders import TextLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
txtloader = TextLoader("C:\\Users\\abhin\\Downloads\\sampletest.txt")
doc = txtloader.load()

In [3]:
split = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
chunks = split.split_documents(doc)
chunks

[Document(metadata={'source': 'C:\\Users\\abhin\\Downloads\\sampletest.txt'}, page_content='Generative AI is like a digital storyteller with infinite imagination. Unlike traditional AI, which is programmed to follow predefined rules, generative models learn patterns from vast amounts of data and then use that knowledge to create something entirely new. For example, a generative AI trained on millions of images can paint a landscape that has never existed beforeâ€”mountains sculpted by algorithms, skies tinted in colors only a neural network would imagine. When applied to text, it can'),
 Document(metadata={'source': 'C:\\Users\\abhin\\Downloads\\sampletest.txt'}, page_content='on millions of images can paint a landscape that has never existed beforeâ€”mountains sculpted by algorithms, skies tinted in colors only a neural network would imagine. When applied to text, it can craft entire essays, poems, or product descriptions tailored to a specific audience, capturing nuance, tone, and co

In [4]:
from dotenv import load_dotenv
load_dotenv()
os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN")

Embedding the chunks into vectors and storing in Vector DB

In [5]:
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")
vectors = Chroma.from_documents(chunks,embedding=embeddings)

In [6]:
vectors.similarity_search("What is Generative AI?")

[Document(id='68982fb5-b662-4a97-9e3d-c45a4812d90d', metadata={'source': 'C:\\Users\\abhin\\Downloads\\sampletest.txt'}, page_content='Generative AI is like a digital storyteller with infinite imagination. Unlike traditional AI, which is programmed to follow predefined rules, generative models learn patterns from vast amounts of data and then use that knowledge to create something entirely new. For example, a generative AI trained on millions of images can paint a landscape that has never existed beforeâ€”mountains sculpted by algorithms, skies tinted in colors only a neural network would imagine. When applied to text, it can'),
 Document(id='98ae397c-dcb6-4bc8-8075-29381bfd4279', metadata={'source': 'C:\\Users\\abhin\\Downloads\\sampletest.txt'}, page_content='But what makes generative AI remarkable is not just its ability to mimic; itâ€™s the way it amplifies human creativity. It doesnâ€™t replace imaginationâ€”it fuels it, offering fresh perspectives and countless variations at the 

In [7]:
retriever = vectors.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)
retriever.batch(["ai", "generative ai"])

[[Document(id='212b62b2-dbdb-4e96-9653-63162de1d551', metadata={'source': 'C:\\Users\\abhin\\Downloads\\sampletest.txt'}, page_content='In business, generative AI is transforming workflows: marketing teams use it to design campaigns in minutes, developers rely on it to generate code snippets, and filmmakers experiment with AI to create storyboards, scripts, and even animated sequences. In healthcare, it assists scientists by generating potential drug molecules or simulating biological interactions that would take years to test manually.')],
 [Document(id='68982fb5-b662-4a97-9e3d-c45a4812d90d', metadata={'source': 'C:\\Users\\abhin\\Downloads\\sampletest.txt'}, page_content='Generative AI is like a digital storyteller with infinite imagination. Unlike traditional AI, which is programmed to follow predefined rules, generative models learn patterns from vast amounts of data and then use that knowledge to create something entirely new. For example, a generative AI trained on millions of im