In [25]:
import os 
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage, SystemMessage


In [26]:
os.environ["GOOGLE_API_KEY"] = "__GOOGLE_KEY__"
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0)

In [27]:
messages = [
    SystemMessage(content="You are an extremely smart frnech professor that only responds in French."),
    HumanMessage(content="Hello, who are you?"),
]
llm.invoke(messages)

AIMessage(content="Bonjour ! Je suis un professeur de français extrêmement intelligent.  À votre service. Comment puis-je vous aider aujourd'hui ?\n", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-e6c3ac97-94d8-44a5-afa0-23b809d1197d-0', usage_metadata={'input_tokens': 22, 'output_tokens': 27, 'total_tokens': 49, 'input_token_details': {'cache_read': 0}})

In [28]:
llm.invoke("What was my last question?")

AIMessage(content="I have no memory of past conversations. Each interaction with me starts fresh.  If you'd like me to answer a question, please ask it again.\n", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-19656e07-d42d-4bda-b318-6179a11b9027-0', usage_metadata={'input_tokens': 7, 'output_tokens': 33, 'total_tokens': 40, 'input_token_details': {'cache_read': 0}})

In [None]:
from langchain_core.prompts import PromptTemplate

query_template = "Tell me about {book_name} by {author}"
prompt = PromptTemplate(input_variables=["book_name", "author"], template=query_template)

chain = prompt | llm
print(chain.invoke({"book_name":"Sapiens", "author":"Yuval Noah Harari"}))

In [None]:
from langchain_community.agent_toolkits.load_tools import load_tools

tools = load_tools(["arxiv"])
print(tools[0].invoke("Attention is all you need"))

In [42]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper
from langchain_community.tools import YouTubeSearchTool

In [43]:
# function to get complete sentences only from a text
def full_sentences(text: str) -> str:
    last_punctuation = max(text.rfind('.'), text.rfind('?'), text.rfind('!'))
    
    # If no sentence-ending punctuation is found, return an empty string
    if last_punctuation == -1:
        return ""
    
    # Return the substring up to and including the last punctuation
    return text[:last_punctuation + 1]

In [None]:
wiki_api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=250)
wikipedia = WikipediaQueryRun(description="A tool to explain things in text format. Use this tool if you think the user’s asked concept is best explained through text.", api_wrapper=wiki_api_wrapper)
wiki_response = full_sentences(wikipedia.invoke("Mobius Strip"))
print(wiki_response)

In [None]:
youtube = YouTubeSearchTool(description="A tool to search YouTube videos. Use this tool if you think the user’s asked concept can be best explained by watching a video.")
print(youtube.run("explain mobius strip"))

In [46]:
tools = [wikipedia, youtube]
llm_with_tools = llm.bind_tools(tools)

In [None]:
#response = llm_with_tools.invoke([HumanMessage("I want to watch latest news on German elections")])
response = llm_with_tools.invoke([HumanMessage("When was the great pyramid of Giza made?")])
print(response.content)
print("=========")
print(response.tool_calls)
if (len(response.tool_calls) > 0):
    item = response.tool_calls[0]
    print(item["name"])
    print(item["args"]["query"])

In [None]:
from langgraph.prebuilt import create_react_agent
system_prompt = SystemMessage(
    """
    You are a helpful bot named Super Commando Dhruv. Your task is to explain topics 
    asked by the user via two mediums: text or video.
    
    If the asked topic is best explained via combination of text and video, 
    then provide a summary of the topic using Wikipedia tool, 
    along with video links on the topic using Youtube tool.
    However, if the asked topic is best explained only in text format, use the Wikipedia tool;
    and if only video is the best medium to explain the topic, conduct a YouTube search on it
    and return found video links.    
   """
)
agent = create_react_agent(llm, tools, state_modifier=system_prompt)
agent.invoke({"messages" : [HumanMessage("What's up?")]})

In [None]:
response = agent.invoke({"messages":[HumanMessage("How do LLMs work?")]})
print(response["messages"])

In [None]:
response = agent.invoke({"messages":[HumanMessage("Which year did Titanic sink? I just want to know the year - nothing more.")]})
print(response["messages"])

## Custom tools

In [29]:
from langchain_core.tools import tool

@tool
def add(a: int, b: int) -> int:
    """
    Tool to add two integers
    
    Arguments:
    a: first integer
    b: second integer
    """
    return a + b

@tool
def multiply(a: int, b: int) -> int:
    """
    Tool to multiply two integers

    Arguments:
    a: first integer
    b: second integer
    """
    return a * b

custom_tools = [add, multiply]
llm_with_custom_tools = llm.bind_tools(custom_tools)

messages = [HumanMessage("what is 3 times 2?  And what if you add 10 to 11?")]
ai_msg = llm_with_custom_tools.invoke(messages)
print(ai_msg.tool_calls)
messages.append(ai_msg)

[{'name': 'multiply', 'args': {'a': 3.0, 'b': 2.0}, 'id': '8704ca11-4363-46a7-809c-104c44926e2d', 'type': 'tool_call'}, {'name': 'add', 'args': {'b': 11.0, 'a': 10.0}, 'id': 'dfa69703-48ec-447c-830b-3d18f24c811e', 'type': 'tool_call'}]


In [30]:
print(multiply.name)
print(multiply.description)
print(multiply.args)

multiply
Tool to multiply two integers

Arguments:
a: first integer
b: second integer
{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}


In [None]:
for tool_call in ai_msg.tool_calls:
    print(tool_call)
    print("==========")
    selected_tool = {"add":add, "multiply":multiply}[tool_call["name"].lower()]
    tool_msg = selected_tool.invoke(tool_call)
    messages.append(tool_msg)

messages

In [None]:
llm_with_custom_tools.invoke(messages)

In [None]:
agent = create_react_agent(llm, custom_tools)
agent.invoke({"messages":[HumanMessage("what is 3 times 2?  And what if you add 10 to 11?")]})

In [37]:
from langchain_core.tools import StructuredTool

def find_max(a: int, b: int) -> int:
    """Returns maximum of the two numbers
    Arguments:
    a: first integer
    b: second integer
    """
    return max(a, b)

async def a_find_max(a: int, b: int) -> int:
    """Returns maximum of the two numbers asynchronously
    Arguments:
    a: first integer
    b: second integer
    """
    return max(a, b)

highest_num = StructuredTool.from_function(func=find_max, coroutine=a_find_max)
print(highest_num)
print(highest_num.invoke({"a":1, "b":2}))
print(await highest_num.ainvoke({"a":10,"b":20}))

name='find_max' description='Returns maximum of the two numbers\n    Arguments:\n    a: first integer\n    b: second integer' args_schema=<class 'langchain_core.utils.pydantic.find_max'> func=<function find_max at 0x316147c40> coroutine=<function a_find_max at 0x316147380>
2
20


## Using smaller LLMs: SmolLM2 / Llamma 1B

In [22]:
import torch
torch.mps.empty_cache()

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_huggingface import HuggingFacePipeline

# model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
model_name = "meta-llama/Llama-3.2-1B-Instruct"
device = "cpu"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
pipe = pipeline("text-generation", model = model, tokenizer=tokenizer, max_new_tokens=500, temperature=0)
hf_llm = HuggingFacePipeline(pipeline=pipe)



In [None]:
hf_llm.invoke([HumanMessage("What is the capital of France?")])

In [None]:
#system_prompt_rewrite = "You are an AI writing assistant. Your task is to rewrite the user's email to make it more professional and approachable while maintaining its main points and key message. Do not return any text other than the rewritten message."
#user_prompt_rewrite = "Rewrite the message below to make it more friendly and approachable while maintaining its main points and key message. Do not add any new information or return any text other than the rewritten message\nThe message:"
#messages = [{"role": "system", "content": system_prompt_rewrite}, {"role": "user", "content":f"{user_prompt_rewrite} The CI is failing after your last commit!"}]
messages = [{"role": "user", "content": "Write a 100 word essay on Transformer LLM archiecture."}]

input_text=tokenizer.apply_chat_template(messages, tokenize=False)
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=100, temperature=0.2, top_p=0.9, do_sample=True)
print(tokenizer.decode(outputs[0]))


In [None]:
# pip install transformers
from transformers import AutoModelForCausalLM, AutoTokenizer
checkpoint = "HuggingFaceTB/SmolLM-1.7B-Instruct"

device = "mps" # for GPU usage or "cpu" for CPU usage
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
# for multiple GPUs install accelerate and do `model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")`
model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)

messages = [{"role": "user", "content": "Write a 100 word essay on Transformer LLM archiecture."}]
input_text=tokenizer.apply_chat_template(messages, tokenize=False)
print(input_text)
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
outputs = model.generate(inputs, max_new_tokens=50, temperature=0.2, top_p=0.9, do_sample=True)
print(tokenizer.decode(outputs[0]))
