## Loading Env

In [None]:
from dotenv import load_dotenv

load_dotenv()

## LLM APIs

### OpenAI Env, Model, and Embedding

In [None]:
import os
os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

In [None]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()

llm.invoke("hello how are you my firend?")

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model="text-embedding-3-large"
)

len(embeddings.embed_query("hello how are you my firend?"))

### Groq KEY and Model

In [None]:
import os
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model_name="deepseek-r1-distill-llama-70b",
    temperature=0
)

response=llm.invoke("what is length of wall of china?")

### Google Gemini Env, Model, and Embedding

In [None]:
import os
os.environ["GOOGLE_API_KEY"]=os.getenv("GOOGLE_API_KEY")

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model='gemini-1.5-flash')

output = model.invoke("hi")
print(output.content)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

embeddings.embed_query("Hello AI")

## Hugging Face Embedding Models

In [None]:
import os
os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en")

len(embeddings.embed_query("hi"))

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

text="this is atest documents"
query_result=embeddings.embed_query(text)
query_result

In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

## Data Loaders

### WebBaseLoader

In [None]:
from langchain_community.document_loaders import WebBaseLoader

# For 1 URL
url = ''
web_loader=WebBaseLoader(url)
data=web_loader.load()

# For Multi URL
urls = ['', '']
docs=[WebBaseLoader(url).load() for url in urls]
docs_list=[item for sublist in docs for item in sublist]

### TextLoader and DirectoryLoader

In [None]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader

loader=DirectoryLoader("../data",glob="./*.txt",loader_cls=TextLoader)
docs=loader.load()

### PDF Loader

In [None]:
from langchain_community.document_loaders import PyPDFLoader

loader=PyPDFLoader('syllabus.pdf')
docs=loader.load()

### ArXiv Loader

In [None]:
from langchain_community.document_loaders import ArxivLoader

docs = ArxivLoader(query="1706.03762", load_max_docs=2).load()

### Wikipedia Loader

In [None]:
from langchain_community.document_loaders import WikipediaLoader

docs = WikipediaLoader(query="Generative AI", load_max_docs=4).load()

## Chunking

### RecursiveCharaterTextSplitter

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Normal Embedding Models
text_splitter=RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=50
)

# OpenAI Embedding Models
text_splitter=RecursiveCharacterTextSplitter.from_tiktoken_encoder
(
    chunk_size=100,
    chunk_overlap=25
)

# Common Code
doc_splits=text_splitter.split_documents(docs_list)


# If only page content needed
doc_string=[doc.page_content for doc in doc_splits]

# If need to preserve metadata
texts = [doc.page_content for doc in doc_splits]
metadatas = [doc.metadata for doc in doc_splits]

### CharacterTextSplitter

In [None]:
from langchain_text_splitters import CharacterTextSplitter

text_splitter=CharacterTextSplitter(separator="\n\n",chunk_size=100,chunk_overlap=20)
text_splitter.split_documents(docs)

### HTMLHeaderTextSplitter

In [None]:
from langchain_text_splitters import HTMLHeaderTextSplitter

html_string = """
<!DOCTYPE html>
<html>
<body>
    <div>
        <h1>Foo</h1>
        <p>Some intro text about Foo.</p>
        <div>
            <h2>Bar main section</h2>
            <p>Some intro text about Bar.</p>
            <h3>Bar subsection 1</h3>
            <p>Some text about the first subtopic of Bar.</p>
            <h3>Bar subsection 2</h3>
            <p>Some text about the second subtopic of Bar.</p>
        </div>
        <div>
            <h2>Baz</h2>
            <p>Some text about Baz</p>
        </div>
        <br>
        <p>Some concluding text about Foo</p>
    </div>
</body>
</html>
"""

headers_to_split_on=[
    ("h1","Header 1"),
    ("h2","Header 2"),
    ("h3","Header 3")
]

html_splitter=HTMLHeaderTextSplitter(headers_to_split_on)
html_header_splits=html_splitter.split_text(html_string)
html_header_splits

### RecursiveJsonSplitter

In [None]:
from langchain_text_splitters import RecursiveJsonSplitter

json_splitter=RecursiveJsonSplitter(max_chunk_size=300)
json_chunks=json_splitter.split_json(json_data)

## Vector Embedding

### FAISS

In [None]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

# Using Inner Product in FAISS Index

index=faiss.IndexFlatIP(3072) # Number of dimensions in the embedding model

db = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# Using Euclidiean Distance in FAISS Index

index=faiss.IndexFlatL2(384) # Number of dimensions in the embedding model

db = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

# If we just need the docstrings
db.add_texts(doc_string)

# If we need to add metadata info as well
db.add_texts(texts, metadatas=metadatas)

# Note: Add texts only works with array of docs

### Chroma

In [None]:
from langchain_community.vectorstores import Chroma

vectorstore=Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chrome", # Any Name
    embedding=embeddings
    
)

### Pinecone

In [None]:
import os
pinecone_api_key=os.getenv("PINECONE_API_KEY")

In [None]:
from pinecone import Pinecone
from pinecone import ServerlessSpec  #Serverless: Server will be Managed by the cloud provider

pc=Pinecone(api_key=pinecone_api_key)

# Index Creation and Loading

index_name="agentic-ai"

#creating a index
if not pc.has_index(index_name):
    pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws",region="us-east-1")    
)

#loading the index
index=pc.Index(index_name)

In [None]:
from langchain_pinecone import PineconeVectorStore

# Vector Store and Similarity Search
vector_store=PineconeVectorStore(index=index,embedding=embeddings)

results = vector_store.similarity_search("what is a langchain?")
results

In [None]:
# Vector Store Retriever

retriever=vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.7} #hyperparameter
)
retriever.invoke("langchain")

## Langchain Inbuilt Tools

### Wikipedia

In [None]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper=WikipediaAPIWrapper(top_k_results=5,doc_content_chars_max= 500)
wiki_tool = WikipediaQueryRun(api_wrapper= api_wrapper)

# To get tool name
wiki_tool.name
# To get tool description
wiki_tool.description
# To get tool args
wiki_tool.args

# Running
wiki_tool.run({"query": "elon musk"})

### Youtube Search

In [None]:
from langchain_community.tools import YouTubeSearchTool

tool = YouTubeSearchTool()

# To get tool name
tool.name
# To get tool description
tool.description
# To get tool args
tool.args

# Running
tool.run("Emergency Awesome")

### Tavily (Search Engine)

In [None]:
import os
TAVILY_API_KEY=os.getenv("TAVILY_API_KEY")

In [None]:
from langchain_community.tools.tavily_search import TavilySearchResults

tool=TavilySearchResults(tavily_api_key=TAVILY_API_KEY)

# Running - 1
tool.invoke({"query":"what happend between Trump and Musk today?"})

# Running - 2
question = "what happend between Trump and Musk today?"
complete_query = "Anwer the follow question by searching the internet and getting best response. Following is the user question: " + question

tool.invoke(complete_query)

In [None]:
from langchain_tavily import TavilySearch

tavily_tool=TavilySearch(tavily_api_key=TAVILY_API_KEY)

question = "what happend between Trump and Musk today?"
complete_query = "Anwer the follow question by searching the internet and getting best response. Following is the user question: " + question

tavily_tool.invoke(complete_query)

### DuckDuckGo

In [None]:
from langchain_community.tools import DuckDuckGoSearchRun

search = DuckDuckGoSearchRun()

search.invoke("what is the latest update on iphone17 release?")

## Custom Tools

### Addition

In [None]:
from langchain.tools import tool

@tool
def add(a: int, b: int) -> int:
    """
    Add two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The Sum of a and b
    """

    return a + b

### Subtract

In [None]:
from langchain.tools import tool

@tool
def subtract(a: int, b: int) -> int:
    """
    Subtract two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The difference of a and b
    """

    return a - b

### Absolute Difference

In [None]:
from langchain.tools import tool

@tool
def abs_diff(a: int, b: int) -> int:
    """
    Subtract two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The absolute difference of a and b
    """

    return abs(a - b)

### Multiplication

In [None]:
from langchain.tools import tool

@tool
def multiple(a: int, b: int) -> int:
    """
    Multiple two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The product of a and b
    """

    return a * b

### Divide

In [None]:
from langchain.tools import tool

@tool
def divide(a: int, b: int) -> int:
    """
    Divide two integers.

    Args:
    a(int): The first integer
    b(int): The second integer

    Returns:
        int: The result of division
    """

    if b == 0:
        raise ValueError("Denominator cannot be zero.")
    return a / b

### Length of Word

In [None]:
@tool
def get_word_length(word:str)->int:
    """
    Calculate the length of the word.

    Args:
    word(str): The word in string

    Returns:
        int: The length of the word
    """
    return len(word)