In [54]:
import os
from dotenv import load_dotenv

load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")

os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')
os.environ['TAVILY_API_KEY'] = os.getenv('TAVILY_API_KEY')

In [2]:
from langchain_groq import ChatGroq

llm = ChatGroq(model='gemma2-9b-it', groq_api_key=groq_api_key)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001C2CA2B2C20>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001C2CA2B1960>, model_name='gemma2-9b-it', model_kwargs={}, groq_api_key=SecretStr('**********'))

# Simple AI Assistant

In [3]:
while True:
    question = input("Type your question. If you want to quit the chat write quit")
    if question != "quit":
        print(llm.invoke(question).content)
    else:
        print("goodbye take cre yourself")
        break

LoRA stands for **Low-Rank Adaptation**. It's a technique used in machine learning, particularly in the field of large language models (LLMs), to fine-tune pre-trained models efficiently. 

Here's a breakdown:

**The Problem:**

* **Large Models, Large Resources:** LLMs are massive, with millions or even billions of parameters. Fine-tuning them from scratch on a specific task is computationally expensive and requires a lot of data.

**LoRA's Solution:**

* **Focus on Adaptable Parameters:** LoRA freezes most of the original model's weights and introduces a small set of **low-rank matrices** that are trained on the new task. These matrices capture the task-specific knowledge.
* **Efficiency:**  Because only a small number of parameters are updated, LoRA requires significantly less memory and computation compared to full fine-tuning.

**Benefits of LoRA:**

* **Faster Training:**  Training is much quicker due to the reduced number of parameters.
* **Lower Memory Requirements:** LoRA is m

In [4]:
from langchain_core.chat_history import InMemoryChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.messages import AIMessage

In [5]:
store = {}

In [6]:
def get_session_history(session_id: str) -> BaseChatMessageHistory:
    if session_id not in store:
        store[session_id] = InMemoryChatMessageHistory()
    return store[session_id]

In [7]:
config = {"configurable": {"session_id": "firstchat"}}

In [8]:
model_with_memory = RunnableWithMessageHistory(llm, get_session_history)

In [9]:
model_with_memory.invoke(("Hi! I'm Sam"), config=config).content

"Hi Sam, it's nice to meet you! 👋\n\nWhat can I do for you today? 😊\n"

In [12]:
model_with_memory.invoke("Tell my name", config=config).content

'Your name is Sam. 😊  \n\nWhat else can I help you with?  \n'

In [13]:
store

{'firstchat': InMemoryChatMessageHistory(messages=[HumanMessage(content="Hi! I'm Sam", additional_kwargs={}, response_metadata={}), AIMessage(content="Hi Sam, it's nice to meet you! 👋\n\nWhat can I do for you today? 😊\n", additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 25, 'prompt_tokens': 15, 'total_tokens': 40, 'completion_time': 0.045454545, 'prompt_time': 8.001e-05, 'queue_time': 0.015259907999999999, 'total_time': 0.045534555}, 'model_name': 'gemma2-9b-it', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None}, id='run-2f69b097-e4b1-4b15-a179-1bc853dad746-0', usage_metadata={'input_tokens': 15, 'output_tokens': 25, 'total_tokens': 40}), HumanMessage(content='Tell me who I am', additional_kwargs={}, response_metadata={}), AIMessage(content="As an AI, I don't have access to any personal information about you, including your identity.  \n\nSo, I can't tell you who you are. \n\nIs there anything else you'd like to talk about?  

## RAG with LCEL

In [14]:
from langchain_community.document_loaders import TextLoader, DirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [21]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [34]:
loader = TextLoader('C:\Sowmya\Personal\Langchain_new\LangGraph-End-To-End\data\llama3.txt')
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
documents = text_splitter.split_documents(docs)

vectorstore = Chroma.from_documents(documents, embedding)
retriever = vectorstore.as_retriever(search_kwargs={"k":4})

In [36]:
template = """ Answer the following question based on following context:
{context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)

In [37]:
retrieval_chain = (
    RunnableParallel({"context": retriever, "question": RunnablePassthrough()})
    | prompt 
    | llm 
    | StrOutputParser()
)

In [38]:
question = "What is llama3?"

retrieval_chain.invoke(question)

'Based on the provided context, Llama 3 is an open-source large language model (LLM) whose development focuses on increasing its helpfulness. The creators are also committed to responsible use and deployment of this technology. \n\n\n'

# Tools and Agent

In [39]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

In [40]:
api_wrapper = WikipediaAPIWrapper()

In [41]:
tool = WikipediaQueryRun(api_wrapper=api_wrapper)

In [42]:
tool

WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper(wiki_client=<module 'wikipedia' from 'c:\\Users\\2260927\\Anaconda3\\envs\\langchain\\lib\\site-packages\\wikipedia\\__init__.py'>, top_k_results=3, lang='en', load_all_available_meta=False, doc_content_chars_max=4000))

In [44]:
tool.name, tool.description

('wikipedia',
 'A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.')

In [45]:
tool.args

{'query': {'description': 'query to look up on wikipedia',
  'title': 'Query',
  'type': 'string'}}

In [48]:
result = tool.run({"query": "langchain"})
print(result)

Page: LangChain
Summary: LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.

Page: Retrieval-augmented generation
Summary: Retrieval Augmented Generation (RAG) is a technique that grants generative artificial intelligence models information retrieval capabilities. It modifies interactions with a large language model (LLM) so that the model responds to user queries with reference to a specified set of documents, using this information to augment information drawn from its own vast, static training data. This allows LLMs to use domain-specific and/or updated information.  
Use cases include providing chatbot access to internal company data or giving factual information only from an authoritative source.

Page: Milvus (vec

In [49]:
from langchain_community.tools import YouTubeSearchTool

In [50]:
tool2 = YouTubeSearchTool()

In [51]:
tool2.name, tool2.description

('youtube_search',
 'search for youtube videos associated with a person. the input to this tool should be a comma separated list, the first part contains a person name and the second a number that is the maximum number of video results to return aka num_results. the second part is optional')

In [53]:
tool2.invoke("Sunny Savita")

"['https://www.youtube.com/watch?v=CKIH6Ou0GeY&pp=ygUMU3VubnkgU2F2aXRh', 'https://www.youtube.com/watch?v=pNHtKluMS0Q&pp=ygUMU3VubnkgU2F2aXRh']"

In [55]:
from langchain_community.tools.tavily_search import TavilySearchResults

In [56]:
tool3 = TavilySearchResults()

In [57]:
tool3.invoke({"query": "when is the expected date to return Sunita Willioms"})

[{'url': 'https://www.bbc.com/news/articles/cwy47w9yndpo',
  'content': 'Sunita Williams and Barry Wilmore: Nasa astronauts stuck in space will return in 2025 Two Nasa astronauts who have been stuck in space for over two months will return to Earth in February 2025 with SpaceX. Nasa said the Boeing Starliner spacecraft the astronauts Sunita Williams and Barry Wilmore had travelled to International Space Station (ISS) on would return to Earth "un-crewed". SpaceX has so far sent nine crewed flights to space for Nasa, as well as some commercial missions, but this was Boeing’s first attempt at a crewed mission. More delays for astronauts stuck in space as Nasa ponders return ---------------------------------------------------------------- 14 Aug 2024 Science & Environment'},
 {'url': 'https://www.financialexpress.com/life/science-countdown-begins-for-sunita-williams-return-to-earth-in-spacex-dragon-after-boeing-starliners-snag-heres-latest-update-from-iss-3677609/',
  'content': 'NASA astr

In [58]:
tool3.invoke("when is the expected date to return Sunita Willioms")

[{'url': 'https://www.bbc.com/news/articles/cwy47w9yndpo',
  'content': 'Sunita Williams and Barry Wilmore: Nasa astronauts stuck in space will return in 2025 Two Nasa astronauts who have been stuck in space for over two months will return to Earth in February 2025 with SpaceX. Nasa said the Boeing Starliner spacecraft the astronauts Sunita Williams and Barry Wilmore had travelled to International Space Station (ISS) on would return to Earth "un-crewed". SpaceX has so far sent nine crewed flights to space for Nasa, as well as some commercial missions, but this was Boeing’s first attempt at a crewed mission. More delays for astronauts stuck in space as Nasa ponders return ---------------------------------------------------------------- 14 Aug 2024 Science & Environment'},
 {'url': 'https://www.financialexpress.com/life/science-countdown-begins-for-sunita-williams-return-to-earth-in-spacex-dragon-after-boeing-starliners-snag-heres-latest-update-from-iss-3677609/',
  'content': 'NASA astr

### Agents -> Legacy

In [59]:
from langchain.agents import AgentType
from langchain.agents import load_tools 
from langchain.agents import initialize_agent

In [73]:
tool = load_tools(["wikipedia"], llm=llm)

In [74]:
agent = initialize_agent(tool, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

In [75]:
agent.invoke("What is current GDP of India?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: I need to look up the current GDP of India on Wikipedia.
Action: wikipedia
Action Input: GDP of India[0m
Observation: [36;1m[1;3mPage: Economy of India
Summary: The economy of India is a developing mixed economy with a notable public sector in strategic sectors. It is the world's fifth-largest economy by nominal GDP and the third-largest by purchasing power parity (PPP); on a per capita income basis, India ranked 141th by GDP (nominal) and 125th by GDP (PPP). From independence in 1947 until 1991, successive governments followed the Soviet model and promoted protectionist economic policies, with extensive Sovietization, state intervention, demand-side economics, natural resources, bureaucrat driven enterprises and economic regulation. This is characterised as dirigism, in the form of the Licence Raj. The end of the Cold War and an acute balance of payments crisis in 1991 led to the adoption of a broad economic libe

{'input': 'What is current GDP of India?',
 'output': "India's nominal GDP is the world's fifth-largest."}