In [20]:
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv,find_dotenv
import os

In [21]:
env_path = find_dotenv(filename="password.env", usecwd=True)
load_dotenv(env_path)

True

In [22]:
key=os.getenv("OPENAI_API_KEY")

### Simple workflow to prompt llm.
>This uses the llm.invoke method which is more general than client.chat,completions.create that we use with open ai. The open ai method is specific to open ai, while the llm.invoke can work with all model providers 

In [9]:
llm = ChatOpenAI( model="gpt-4o-mini",
                 temperature = 0,
                 openai_api_key=key
                 )

In [10]:
llm.invoke("Why is math useful? Give me 3 bullet points.")

AIMessage(content='- **Problem-Solving Skills**: Math enhances critical thinking and analytical skills, enabling individuals to approach and solve complex problems in various fields, from science and engineering to finance and everyday life.\n\n- **Real-World Applications**: Math is essential in numerous practical applications, such as budgeting, cooking, construction, and technology, helping people make informed decisions and optimize resources.\n\n- **Foundation for Advanced Learning**: A strong understanding of math is crucial for pursuing advanced studies in disciplines like physics, computer science, economics, and medicine, as it provides the necessary tools for understanding and modeling real-world phenomena.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 121, 'prompt_tokens': 19, 'total_tokens': 140, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens

### Prompt templates
* Typically used for 
    * Examples (few shot prompts)
    * Adittional context
    * Structured questions

In [14]:
# creating a template to explain concepts
from langchain_core.prompts import PromptTemplate
 
template = "Explain the concept of {concept} in simple terms using 3 bullet points"
prompt_template = PromptTemplate.from_template(template)
prompt = prompt_template.invoke({"concept": "data science"})
print(prompt)

text='Explain the concept of data science in simple terms using 3 bullet points'


In [16]:
# full workflow with LLM and prompt template
from langchain_core.prompts import PromptTemplate

# Write the template string and create a PromptTemplate object
template = "Explain the following concept concisely in 3 bullet points: {concept}"
template = PromptTemplate.from_template(template)

#create an appropriate llm
llm = ChatOpenAI( model="gpt-4o-mini",
                 temperature = 0,
                 openai_api_key=key
                 )
# Define the llm chain
llm_chain = (prompt_template) | (llm)
concept = "machine learning"
response = llm_chain.invoke({"concept": concept})

print(response)



content='- **Learning from Data**: Machine learning is a way for computers to learn patterns and make decisions based on data, rather than being explicitly programmed for every task.\n\n- **Improvement Over Time**: As more data is fed into the system, the machine learning model can improve its accuracy and performance, adapting to new information and changing conditions.\n\n- **Applications Everywhere**: Machine learning is used in various everyday applications, such as recommending movies, recognizing speech, and detecting spam emails, making technology smarter and more user-friendly.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 105, 'prompt_tokens': 21, 'total_tokens': 126, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-0

### ChatPrompt template
> This kind of prompt template allows us to use Chat roles in the template. We can use roles to pass example conversations and system messages along with every prompt that is created with this template. This helps structure reponses in the correct format (using example conversations).

>The template is structured as a list of tuples. Each tuple has a role and the associtaed prompt with the role. The last tuple is typically the prompt where the user is asking the question

In [5]:
from langchain_core.prompts import ChatPromptTemplate

dt_def = """A decision tree is a supervised learning algorithm that uses a tree-like structure 
to make predictions or classifications based on a series of questions about a dataset. 
It starts with a root node, branches out through internal nodes (which represent tests on features), 
and ends at leaf nodes (which are the final decisions or outcomes). Decision trees can be used for both regression and classification tasks 
and are known for being easy to interpret, as they mimic a flowchart of human decision-making. """

# Define the template
template = [
    ("system","You are a helpful datascience explainer"),
    ("human","answer this question: what is a descion tree?"),
    ("ai",dt_def),
    ("human", "answer this question:{dt_question}")

    ]
llm = ChatOpenAI( model="gpt-4o-mini",
                 temperature = 0,
                 openai_api_key=key)

template = ChatPromptTemplate.from_messages(template)
llm_chain = (template) | (llm)
dt_question = "How are decision trees used in real-world applications?"
response = llm_chain.invoke({"dt_question": dt_question})
     

In [6]:
print(response)

content='Decision trees are widely used in various real-world applications due to their simplicity, interpretability, and effectiveness. Here are some common applications:\n\n1. **Healthcare**: Decision trees can help in diagnosing diseases by analyzing patient symptoms and medical history. For example, they can be used to predict whether a patient has a certain condition based on various health indicators.\n\n2. **Finance**: In credit scoring, decision trees can assess the risk of lending to an individual by evaluating factors such as credit history, income, and debt levels. They help in making decisions about loan approvals.\n\n3. **Marketing**: Businesses use decision trees to segment customers based on purchasing behavior, demographics, and preferences. This helps in targeting marketing campaigns more effectively.\n\n4. **Fraud Detection**: Financial institutions employ decision trees to identify potentially fraudulent transactions by analyzing patterns and anomalies in transaction

### Few shot prompt template
> The *ChatPromptTemplate* class can handle few examples. If you need to provide more examples then a few shot prompt template is better. You can set up your examples as a pandas dataframe and convert to a dictionary using *pd.to_dict(orient='records)* 

> Need to provide 2 things
* A list of examples
* a prompt template that will show how the prompt will be structues

In [26]:
import pandas as pd
from langchain_core.prompts import PromptTemplate,FewShotPromptTemplate
from langchain_openai import ChatOpenAI
# Define the examples
df = pd.read_csv('Reviews.csv')

examples = df.to_dict(orient='records')

example_template = PromptTemplate.from_template("Review: {review},Sentiment: {rating}\n")

prompt_template = FewShotPromptTemplate(
    examples = examples,
    example_prompt = example_template,
    suffix = "Review:{input}",
    input_variables = ["input"]
)

llm = ChatOpenAI( model="gpt-4o-mini",
                    temperature = 0,
                    openai_api_key=key)

llm_chain = (prompt_template) | (llm)

response = llm_chain.invoke({"input": "This movie is the greatest ever"})

response.content


'Sentiment: 5'

### Sequential chains
* Some things can only be done sequentially.
* Consider a chatbot that plans an itenary
    * Recieves destination
    * compiles a list of sites
    * Chooses top 3 sites and creates an itenary

In [30]:
#create destination prompt template
from langchain_core.output_parsers import StrOutputParser
destination_prompt = PromptTemplate(input_variables=["destination"],
                                    template = "I am planning a trip to {destination}. Give me a brief overview of the place and suggest 3 must-visit attractions.")

attractions_prompt = PromptTemplate(input_variables=["attractions"],
                                   template = "Based on the following attractions: {attractions}, suggest some activities to do.")

llm = ChatOpenAI( model="gpt-4o-mini",
                    temperature = 0,
                    openai_api_key=key)

seq_chain ={'attractions': destination_prompt | llm | StrOutputParser()} | attractions_prompt | llm
response = seq_chain.invoke({"destination": "Paris"})
print(response)

content="Absolutely! Here are some activities you can enjoy while visiting the must-see attractions in Paris:\n\n### Activities Around the Eiffel Tower:\n1. **Picnic at Champ de Mars**: Grab some fresh baguettes, cheese, and wine from a local market and enjoy a picnic on the lawns of Champ de Mars with a stunning view of the Eiffel Tower.\n2. **Seine River Cruise**: Take a scenic boat cruise along the Seine River, especially at sunset, to see the Eiffel Tower and other landmarks illuminated at night.\n3. **Dinner at a Nearby Restaurant**: Enjoy a meal at one of the many restaurants with views of the Eiffel Tower, such as Le Café de l'Homme or Les Ombres.\n\n### Activities at the Louvre Museum:\n1. **Guided Tour**: Consider joining a guided tour to learn more about the history and significance of the artworks, including the Mona Lisa and the Winged Victory of Samothrace.\n2. **Explore the Tuileries Garden**: After your visit to the Louvre, take a leisurely stroll through the beautiful T

In [33]:
from langchain_core.prompts import PromptTemplate

skill_gap_template = PromptTemplate(input_variables=["current_skills"],
                                    template="""Given the skills required for a good data scientist 
                                    and the current skills {current_skills}, identify the skill gaps. \
                                    "Keep your response brief and to the point"""
                                    )

project_recco_template = PromptTemplate(input_variables=["skill_gaps"],
                                    template="""Based on the following skill gaps: {skill_gaps}, 
                                    suggest 2 project ideas that can help in acquiring these skills. 
                                    "Provide a brief description for each project idea."""
                                    )
llm = ChatOpenAI( model="gpt-4o-mini",
                    temperature = 0,
                    openai_api_key=key)

seq_chain = {'skill_gaps': skill_gap_template | llm | StrOutputParser()} | project_recco_template | llm
response = seq_chain.invoke({"current_skills": "Python, SQL, Basic Statistics"})

with open("project_ideas.txt","w") as f:
    f.write(response.content)

### React agents
* Agents use LLMs to take actions
* Agents can have functions or tools to interact with the environment
* React stands for reasoning and action. Observe --> think --> act
* Use langgraph to design agents

In [40]:
from langgraph.prebuilt import create_react_agent
from langchain_community.agent_toolkits.load_tools import load_tools

llm = ChatOpenAI( model="gpt-4o-mini",
                    temperature = 0,
                    openai_api_key=key)

tool = load_tools(["llm-math"],
                  llm = llm)

agent = create_react_agent(llm,tools=tool)

fnl_message = agent.invoke({'messages':[('system','When the user asks any question involving '
'math or arithmetic, always convert the question into a clear mathematical expression and '
'call the calculator tool. Do not answer math questions without using the tool.')
    ,('human',"What is the square root of 256 plus 100?")]})
print(fnl_message['messages'][-1].content)

C:\Users\gaura\AppData\Local\Temp\ipykernel_29416\1218472377.py:11: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent = create_react_agent(llm,tools=tool)


The square root of 256 plus 100 is 116.0.


In [56]:
import wikipedia
from langgraph.prebuilt import create_react_agent
from langchain_community.agent_toolkits.load_tools import load_tools

tool = load_tools(["wikipedia"])

agent = create_react_agent(llm,
                           tools=tool)

response=agent.invoke({'messages':[('human','what is the population of Nepal?')]})

response['messages'][-1].content


C:\Users\gaura\AppData\Local\Temp\ipykernel_29416\3443211687.py:7: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent = create_react_agent(llm,


'The current population of Nepal is approximately 29,164,578, according to the 2021 census. The population growth rate is about 0.92% per year.'

#### Design custom tools
> Till now we have only loaded already available tools. The real power is when you can write your own custom tools for agents. Tools are generally built from Python functions and use a tool decorator

In [None]:
from langchain_core.tools import tool
from langgraph.prebuilt import create_react_agent

@tool
def create_financial_report(revenue:float,expenses:float,company_name:str):
    """ Generates a simple income statemrnt for a comapny given revenue and expenses"""
    net_income = revenue - expenses
    report = f"Financial report for {company_name}:\n"
    report += f"Total revenue: ${revenue}\n"
    report += f"Total expenses: ${expenses}\n"
    report += f"Net income: ${net_income}\n"
    return report

llm = ChatOpenAI( model="gpt-4o-mini",
                 temperature = 0,
                 openai_api_key=key)

agent = create_react_agent(llm,tools=[create_financial_report])
response=agent.invoke({'messages':[('human','Create a financial report for ABC Corp with revenue of 1,000,000 and expenses of 750,000')]})  
print(response['messages'][-1].content)

C:\Users\gaura\AppData\Local\Temp\ipykernel_29416\567845599.py:18: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent = create_react_agent(llm,tools=[create_financial_report])


Here is the financial report for ABC Corp:

- **Total Revenue:** $1,000,000.00
- **Total Expenses:** $750,000.00
- **Net Income:** $250,000.00


: 

## RAG (Retrieval Augmented generation)

### Document loaders
> Pre-trained LLMs do not have access to external data. Their understanding comes purely from their training data. RAG uses embeddings to retrieve the most relevant documents from the database in answer to the users queries which are themselves converted into embeddings
* Load the documents using document loaders
* Split the documents into chunks
* Encode and store the chunks for retreival

> Langchain provides document loader classes for common file types such as pdfs and csvs

> After loading documents we split them into chunks. Chunking is particularly useful because chunks can be fit into an LLMs context window

##### Chunking startegies
* You can chunk by line. This is very simple but sentences can be split into multiple lines. Creating a chunk overlap can retain context between chunks. If the model performance on queries related to the external data source is poor then you might need to increase the chunk overlap


In [50]:
from langchain_community.document_loaders import CSVLoader
loader = CSVLoader(file_path='Reviews.csv',encoding='utf-8')
data = loader.load()
data[0]

Document(metadata={'source': 'Reviews.csv', 'row': 0}, page_content='review: Great movie\nrating: 5')

In [53]:
quote = """ Life is like a box of chocolates. You never know what you're gonna get. This is what
Forest Gump said in the movie Forest Gump. The quote means that life is full of surprises and uncertainties, 
just like a box of chocolates where you can't see what's inside until you take a bite. 
It encourages us to embrace the unpredictability of life and be open to new experiences."""

from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 10,
    chunk_overlap = 5,
    separators= ['.','\n']
)

chunks = text_splitter.split_text(quote)
[len(chunk) for chunk in chunks]

[33, 38, 14, 42, 68, 86, 1, 88, 1]

#### Storage and retreival using Vector Datbases
* the workflow
    > Load Document --> Split Text --> Storage and retreival for user queries
* We use Vector databases to store and retreive information
    > User prompt --> embedding model --> Vector Database --> Retreive similar documents --> Insert similar docs into prompt template --> feeed the response to the LLM
* For choosing Vector databases the key considerations are
    * Open source vs a closed ecosystem. Open source is much more flexible but might have security challenges
    * Cloud vs an on-prem solution
    * Lightweight vs powerful

In [59]:
# Set up
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import CSVLoader

# Creating an object of the OpenAIEmbeddings class with model text-embedding-small
embedding_function = OpenAIEmbeddings(model="text-embedding-3-small",
                              openai_api_key=key)


loader = CSVLoader(file_path='Reviews.csv',encoding='utf-8')
data = loader.load()

#split the documents into chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 20,
    chunk_overlap = 10,
    separators= ['.','\n']
)

docs = text_splitter.split_documents(data)

# Create the Chroma vector store
vector_store = Chroma.from_documents(documents=docs,
                                        embedding=embedding_function,
                                        collection_name="reviews_collection",
                                        persist_directory="./chroma_db"
                                        )
                                     
# Create a retriever from the vector store
retriever = vector_store.as_retriever(search_type="similarity",search_kwargs={"k":2})


In [65]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
message = """ Given the folloawing reviews as examples
             {reviews}
             Give me the rating of the review: {input}
             rating:
             """

template = ChatPromptTemplate.from_messages([('human',message)])

llm = ChatOpenAI( model="gpt-4o-mini",
                    temperature = 0,
                    openai_api_key=key)

ragchain = ({'reviews':retriever,'input':RunnablePassthrough()} | template | llm)

In [67]:
response = ragchain.invoke("This was a movie with good acting and a good storyline")
print(response.content)

Based on the review "This was a movie with good acting and a good storyline," it seems to convey a positive sentiment. While the exact rating isn't provided, a reasonable assumption could be a rating of 4 or 5 out of 5, given the praise for both acting and storyline. 

If you need a specific rating, please provide a scale or criteria for rating.
