In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")


In [None]:
# Simple GenAI App
url = "https://towardsdatascience.com/ai-agents-processing-timeseries-and-large-dataframes/"
# Scrape from website
from langchain_community.document_loaders import WebBaseLoader
loader = WebBaseLoader(url)
docs = loader.load()
# Load docs -> convert to chunks -> convert to embeddings -> store in vector DB

from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(docs)

# These documents will be converted to vectors and stored in vector DB, becasue for RAG we fetch relevant similar content using cosine similarity which works on vectors

from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

from langchain_community.vectorstores import FAISS
vectorstore_db = FAISS.from_documents(documents, embeddings)

vectorstore_db

In [None]:
query = "How LLMs work with smaller datasets?"
result = vectorstore_db.similarity_search(query, k=3)
print(result[0].page_content)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
prompt = ChatPromptTemplate.from_messages([
    """
    Answer the following question based on the provided context:
    <context>
    {context}
    </context>
    """
])

# document_chain = create_stuff_documents_chain(llm,prompt)

from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")
document_chain = create_stuff_documents_chain(llm,prompt)
print(document_chain)

from langchain_core.documents import Document
document_chain.invoke({
    "input":"How LLMs work with smaller datasets?",
    "context":[Document(page_content="LLMs are trained on large datasets, but they can also work with smaller datasets by leveraging transfer learning and fine-tuning techniques. This allows them to adapt to specific tasks or domains without requiring massive amounts of data. Additionally, LLMs can generate synthetic data to augment smaller datasets, improving their performance on tasks with limited data availability.")]
})

In [None]:
from langchain.chains import create_retrieval_chain
# Input -> Retriever -> Vector DB | Retriever is an interface to fetch relevant documents from vector DB
retriever = vectorstore_db.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
retrieval_chain


In [None]:
response = retrieval_chain.invoke({"input":"what does Langchain do, some info about it?"})
print(response['answer'])

In [None]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o")
print(llm)

In [None]:
result = llm.invoke("What is the capital of France?")
print(result)

In [None]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system","You're an expert AI Engineer. Please provide me answers based on the following questions"),
    ("user","{input}"),
])
print(prompt)

In [None]:
chain=prompt|llm
response = chain.invoke({"input":"What is the capital of France?"})
response

In [None]:
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()
chain = prompt|llm|output_parser
response = chain.invoke({"input":"What is the capital of France?"})
print(response)