## Simple RAG Pipeline for Text Editor

In [1]:

import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
## Import text loader 
from langchain_community.document_loaders import TextLoader

loader = TextLoader('docs\AI.txt')

text = loader.load()

text

[Document(metadata={'source': 'docs\\AI.txt'}, page_content='What is AI transformation?\nAI transformation is a strategic initiative whereby a business adopts and integrates artificial intelligence (AI) into its operations, products and services to drive innovation, efficiency and growth. AI transformation optimizes organizational workflows by using a range of AI models and other technologies to create a continuously evolving and agile business.\n\nAI transformations employ machine learning and deep learning modelsfor example, computer vision, natural language processing (NLP), and generative AItogether with other technologies to create systems that can:\n\nAutomate manual tasks and repetitive administrative work.\nModernize apps and IT with code generation.\nProvide data-driven insights and decision-making support by using advanced analytics.\n"Learn" from data to improve accuracy and performance over time.\nImprove the customer experience with personalization and chatbots.\nAs advanc

In [3]:
## RecursiveCharacterTextSplitter will split test into small chunks due to llm has less context size.

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter( chunk_size=512,chunk_overlap=50)
chunk_text = text_splitter.split_documents(text)
chunk_text[:5]

[Document(metadata={'source': 'docs\\AI.txt'}, page_content='What is AI transformation?\nAI transformation is a strategic initiative whereby a business adopts and integrates artificial intelligence (AI) into its operations, products and services to drive innovation, efficiency and growth. AI transformation optimizes organizational workflows by using a range of AI models and other technologies to create a continuously evolving and agile business.'),
 Document(metadata={'source': 'docs\\AI.txt'}, page_content='AI transformations employ machine learning and deep learning modelsfor example, computer vision, natural language processing (NLP), and generative AItogether with other technologies to create systems that can:'),
 Document(metadata={'source': 'docs\\AI.txt'}, page_content='Automate manual tasks and repetitive administrative work.\nModernize apps and IT with code generation.\nProvide data-driven insights and decision-making support by using advanced analytics.\n"Learn" from data to 

In [4]:
len(chunk_text)

39

In [5]:
## we have chunks, we need to ADD those chunct into vector database so we need to have vector embedding first then we can insert the record into cromadb.

if not os.getenv("COHERE_API_KEY"):
    os.environ["COHERE_API_KEY"] = getpass.getpass()

In [6]:
from langchain_cohere import CohereEmbeddings

embeddings = CohereEmbeddings(
    model="embed-english-v3.0",
)

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\GAURAV\AppData\Local\sagemaker\sagemaker\config.yaml


In [7]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="FirstCromaCollection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
)

In [8]:
from uuid import uuid4

uuids = [str(uuid4()) for _ in range(len(chunk_text))]

vector_store.add_documents(documents=chunk_text, ids=uuids)



['14e7c1dd-0c63-40a7-b037-5f0d68520dfe',
 'bb316cf6-b22f-410e-87cd-403ab57ca9c1',
 '2a178f72-1f08-4604-b425-23a6691b69dc',
 'f264fc4f-6ae6-4268-91c9-23987dee6fe2',
 '2b82dfc1-37bd-4d32-a5e7-ef531967dbeb',
 'd2840407-80ff-4040-a14d-7048503692f1',
 'ddbaecb6-7076-4c87-b98b-686d20665898',
 '3b40a7cd-dc00-45bd-add9-6a1525513397',
 '17a16382-ac40-4ee1-adc5-173652f9b677',
 'b64d7621-ebdc-42a0-8f33-360edfa4ed50',
 'e24ef55b-9410-4197-a721-322cfd990c6e',
 '7ac97b9e-c9af-43e9-a93d-5e0be8c15c92',
 'e83f1bea-5b91-4c01-8c77-0d3b8a40c463',
 'ca9a9be5-9609-4006-81be-da38589ae842',
 '4cf66e67-278c-4c79-9243-f29773d9b045',
 '4bf22f3b-db7b-4b15-806c-b50c932c3bef',
 'a4768edb-bd44-4b86-86ee-61f0d57d1bb8',
 '978d1b1c-43dc-4b69-8f61-455081e40c53',
 '62a7e5c4-fcaa-4bb9-a115-afe72099abc3',
 '20a32819-364d-4806-80db-1574c3d0f33a',
 '40662cd0-6c27-4f11-b909-95b581c184be',
 'c67e003a-5f01-4f20-960d-8f3ca5fd24a3',
 '17733433-34dd-471c-a76f-80f58805ff61',
 'e59d9c53-42db-4bb7-a968-6500c23d8b0c',
 'e26a464c-ec3a-

In [9]:
results = vector_store.similarity_search(
    "What is AI transformation?",
    k=2
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* What is AI transformation?
AI transformation is a strategic initiative whereby a business adopts and integrates artificial intelligence (AI) into its operations, products and services to drive innovation, efficiency and growth. AI transformation optimizes organizational workflows by using a range of AI models and other technologies to create a continuously evolving and agile business. [{'source': 'docs\\AI.txt'}]
* Typically, an AI transformation is a more holistic endeavor than the simple replication of existing business processes with new technologies. A well-crafted AI transformation strategy has the capacity to create entirely new ways of doing business, increase productivity and facilitate sustainable growth. To realize and scale the technology, AI transformations often require businesses to change their strategies and cultures. [{'source': 'docs\\AI.txt'}]


In [10]:
## added an example of vector store

from langchain_core.vectorstores import InMemoryVectorStore


vectorstore = InMemoryVectorStore.from_documents(
    chunk_text,
    embedding=embeddings,
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is AI transformation?")

# show the retrieved document's content
retrieved_documents[0].page_content

'What is AI transformation?\nAI transformation is a strategic initiative whereby a business adopts and integrates artificial intelligence (AI) into its operations, products and services to drive innovation, efficiency and growth. AI transformation optimizes organizational workflows by using a range of AI models and other technologies to create a continuously evolving and agile business.'