# Package Requirements

In [None]:
!pip3 -q install openai langchain huggingface_hub --quiet
!pip3 install cohere --quiet
!pip3 install yfinance --quiet
!pip3 install -U langchain-openai --quiet
!pip3 install pytesseract  --quiet
!pip3 install Pillow --quiet
!pip3 install tesseract --quiet
!pip3 install tesseract-ocr --quiet
!pip3 install libtesseract-dev --quiet
!pip3 install langchain_cohere
!pip3 install langchain_community

In [None]:
from langchain.llms import OpenAI
from langchain.llms import HuggingFaceHub
from langchain.llms import Cohere
from langchain import PromptTemplate
from langchain import LLMChain
from langchain.chains import SequentialChain
import yfinance as yf
import pytesseract
from PIL import Image
from IPython.display import Image as display_image

In [None]:
import os

#Better way
from google.colab import userdata
os.environ['OPENAI_API_KEY'] = userdata.get("OPENAI_API_KEY")
os.environ['HUGGINGFACEHUB_API_TOKEN'] = userdata.get("HUGGINGFACEHUB_API_TOKEN")
os.environ['COHERE_API_KEY'] = userdata.get("COHERE_API_KEY")

#LLMS

## OpenAI model - Paid

In [None]:
from langchain.llms import OpenAI

llm_openai=OpenAI(temperature=0.9, max_tokens=256)
response = llm_openai.invoke("Write a 4 line poem on AI")
print(response)

# - temperature: Set to 0.9, which controls the randomness of the output.
#   A higher temperature results in more varied and unpredictable outputs,
#   while a lower temperature produces more deterministic and conservative outputs.
#   This is often used in generative tasks to balance between creativity and relevance.

# - max_tokens: Set to 256, which specifies the maximum number of tokens (words or pieces of words)
#   that the model can generate in a single response.


llm_openai=OpenAI(temperature=0.9, max_tokens=256)


## Cohere - Opensource Alternative to OpenAI

In [None]:
from langchain.llms import Cohere

llm = Cohere(model="command-xlarge-nightly")
response = llm.invoke("Write a 4 line poem on AI")
print(response)

## Hugging face model - Free

In [None]:
from langchain.llms import HuggingFaceHub

#repo_id="openai-community/gpt2"
repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1"

llm = HuggingFaceHub(
    repo_id=repo_id,
    model_kwargs={"temperature": 0.9, "max_length": 256},
)

response = llm.invoke("Write a 4 line poem on AI")
print(response)

# Where LLMs fail

In [None]:
llm=OpenAI(temperature=0, max_tokens=256)
#llm=Cohere(model="command-xlarge-nightly")

response = llm.invoke("What is current market price of the Apple Stock?")
print(response)

In [None]:
import yfinance as yf

# Get the current market price of Apple stock
apple_stock = yf.Ticker("AAPL")
apple_cmp= apple_stock.info["currentPrice"]
print(apple_cmp)

## Prompt Templates

In [None]:
response = llm.invoke("Write a 4 line poem on AI")
response1 = llm.invoke("Craft a quartet of verses celebrating the marvels of artificial intelligence.")
response2 = llm.invoke("Compose a brief, ode to the wonders of AI.")
response3 = llm.invoke("Pen a short poem that captures the essence of artificial intelligence.")
response4 = llm.invoke("Create a succinct tribute to the advancements in AI.")

print("\n======= response =======\n", response)
print("\n======= response1 =======\n", response1)
print("\n======= response2 =======\n", response2)
print("\n======= response3 =======\n", response3)
print("\n======= response4 =======\n", response4)

In [None]:
from langchain import PromptTemplate

template = "Write a 4 line poem on the subject {subject_name}"

prompt = PromptTemplate(
    input_variables=["subject_name"],
    template=template,
)

print(prompt.format(subject_name="Data Science"))
print(prompt.format(subject_name="Fathers Day"))
print(prompt.format(subject_name="Solar System"))

# LLM Chain

In [None]:
from langchain.llms import OpenAI, Cohere
from langchain import PromptTemplate
from langchain import LLMChain

#llm=OpenAI(temperature=0.1)
llm=Cohere(temperature=0.1)

template = "List down the historically significant steps in the field of {filed_name}"
prompt = PromptTemplate(
    input_variables=["filed_name"],
    template=template,
)

chain=LLMChain(llm=llm, prompt=prompt)
#chian= prompt | llm
result=chain.invoke("Machine Learning")
print(result['text'])

### LAB: Example of an LLMChain

In [None]:
#llm=OpenAI(temperature=0.9)
llm=Cohere(temperature=0.9)

template = "The topic name is {topic}. Explain this topic to a 10 years old kid"
prompt = PromptTemplate(
    input_variables=["topic"],
    template=template,
)

chain=LLMChain(llm=llm, prompt=prompt)
#chian= prompt | llm
result=chain.invoke("Logistic Regression")
print(result['text'])


# Sequential Chains

## Chain1 : Finds the top10 books
Find out the top ten books on any subject with this dedicated Chain.

In [None]:
from langchain_core.output_parsers import StrOutputParser
#llm=OpenAI(temperature=0.5)
llm=Cohere(temperature=0.5)

book_name_prompt_template = PromptTemplate(
    input_variables=["theme"],
    template="""Please provide a simple list of ten well-known
                books that center around the theme of {theme}.
                Do not include book description"""
)

book_name_chain = LLMChain(llm=llm,
                           prompt=book_name_prompt_template,
                           output_key="book_names_list")
#book_name_chain1=prompt | llm | {"book_names_list": StrOutputParser()}

books_list = book_name_chain.invoke(input="personality development")
print(books_list["book_names_list"])

## Chain2 : Gives the summary

This delivers a detailed summary for any specified book title.

In [None]:
#llm=OpenAI(temperature=0.9, max_tokens=3000)
llm=Cohere(temperature=0.9, max_tokens=3000)

book_summary_prompt_template = PromptTemplate(
    input_variables=["book_names_list"],
    template="""Please take any one book from the books list {book_names_list}.
                Mention the book title.
                Please provide a comprehensive summary of the book,in three sections
                and each section with three summary points"""
)


book_summary_chain = LLMChain(llm=llm,
                              prompt=book_summary_prompt_template,
                              output_key="book_summary")

book_summary = book_summary_chain.invoke(input="The Catcher in the Rye by J.D. Salinger")

# Print the books
print(book_summary['book_summary'])


## SequentialChain

Takes theme as input. It first gets top 10 books from the given theme. Then it provides summary of any one of the top 10 books, without taking an specific input.

In [None]:
from langchain.chains import SequentialChain

book_chain = SequentialChain(
    chains=[book_name_chain, book_summary_chain],
    input_variables=["theme"],
    output_variables=["book_names_list", "book_summary"]
    )

# Get the book summary for a specific book based on the theme
book_summary = book_chain.invoke(input={"theme": "Personal Finance"})

#print(book_summary)
print(book_summary["book_summary"])


# LAB : Sequential Chain

In [None]:
SBIN_Stock_Analysis = """

Company name is State Bank of India
NSE Symbol is SBIN
MARKET CAP - ₹ 6,69,078.16 Cr.
Company has a good Return on Equity (ROE) track record: 3 Years ROE 13.46%.
CASA stands at 42.67% of total deposits.
The company has delivered good Profit growth of 51.35% over the past 3 years.
Company has delivered good profit growth of 76.1% CAGR over last 5 years.
Company has been maintaining a healthy dividend payout of 17.3%.
Company's working capital requirements have reduced from 152 days to 118 days
The bank has a very low ROA track record. Average ROA of 3 years is 0.70%.
Low other Income proportion of 11.03%.High Cost to income ratio of 53.87%.
Company has low interest coverage ratio.
The company has delivered a poor sales growth of 8.91% over past five years.
Company has a low return on equity of 12.8% over last 3 years.
Contingent liabilities of Rs.19,00,096 Cr.
Company might be capitalizing the interest cost.
Earnings include an other income of Rs.1,39,611 Cr.

"""
print(SBIN_Stock_Analysis)

## Chain1 : Positives and Negatives

In [None]:
#llm=OpenAI(temperature=0, max_tokens=256)
llm=Cohere(temperature=0, max_tokens=256)

template ="""Read the text data from {stock_analysis_input}.
              Mention the company name and marekt capital.
              Write top3 positive and top3 negative points.
              keep the points short"""

information_extraction_prompt = PromptTemplate(
    input_variables=["stock_analysis_input"],
    template=template,
)

#print(information_extraction_prompt.format(stock_analysis_input=SBIN_Stock_Analysis))

information_extraction_chain=LLMChain( llm=llm,
                                       prompt=information_extraction_prompt,
                                       output_key="Pros_and_Cons")

result=information_extraction_chain.invoke(SBIN_Stock_Analysis)
#print(result.keys())
print(result['Pros_and_Cons'])

## Chain2 : Investor Report

In [None]:
#llm=OpenAI(temperature=0, max_tokens=256)
llm=Cohere(temperature=0, max_tokens=256)

template ="""
Imagine you've been analyzing stocks for over 15 years.
Look at the good and bad points, and see if the company can grow.
Right now, is buying shares of this company a smart move?
take the data from {Pros_and_Cons}
"""

stock_decision_prompt = PromptTemplate(
    input_variables=["Pros_and_Cons"],
    template=template,
)
#print(stock_decision_prompt.format(Pros_and_Cons=result['Pros_and_Cons']))

stock_decision_chain=LLMChain(llm=llm,
                              prompt=stock_decision_prompt,
                              output_key="Investor_Report")
result=stock_decision_chain.invoke(SBIN_Stock_Analysis)
print(result['Investor_Report'])

## Final Sequential Chain


In [None]:
full_chain=SequentialChain(chains=[information_extraction_chain, stock_decision_chain],
                           input_variables=["stock_analysis_input"],
                           output_variables=["Pros_and_Cons", "Investor_Report"])
result=full_chain.invoke(SBIN_Stock_Analysis)
print(result["Investor_Report"])

# LangChain + IDP (Intelligent Document Processing)

In [None]:
#Get the Images, try Invoice_1.png, Invoice_2.png, Invoice_3.png, Invoice_4.png
#Try different images in this example
!wget https://raw.githubusercontent.com/giridhar276/Datasets/master/IDM_Datasets/Invoices/Invoice_1.png
!wget https://raw.githubusercontent.com/giridhar276/Datasets/master/IDM_Datasets/Invoices/Invoice_2.png
!wget https://raw.githubusercontent.com/giridhar276/Datasets/master/IDM_Datasets/Invoices/Invoice_3.png
!wget https://raw.githubusercontent.com/giridhar276/Datasets/master/IDM_Datasets/Invoices/Invoice_4.png


image_path=image_path = '/content/Invoice_4.png'
display_image(filename=image_path)

## IDP without LLM

In [None]:
import pytesseract
import re

def extract_email_addresses(image_path):
    text = pytesseract.image_to_string(image_path)
    email_addresses = re.findall(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}", text)
    # Regular expression to match dates in DD/MM/YYYY, DD-MM-YYYY, or YYYY-MM-DD formats
    dob_patterns = re.findall(r"\b(?:\d{2}[/-]\d{2}[/-]\d{4}|\d{4}[/-]\d{2}[/-]\d{2})\b", text)
    print("Email Address: ", email_addresses)
    return

In [None]:
extract_email_addresses(image_path)

## IDP with LLMs

In [None]:
#Extract Text from Image
img = Image.open(image_path)
invoice_text = pytesseract.image_to_string(img)
#print(invoice_text)

#llm=OpenAI(temperature=0)
llm=Cohere(temperature=0)

template="""
Take the information from {invoice_text} and print the itemwise price and quantity.
"""

invoice_prompt = PromptTemplate(
    input_variables=["invoice_text"],
    template=template,
)

invoice_chain=LLMChain(llm=llm, prompt=invoice_prompt, output_key="itemwise_price_and_quantity")
result=invoice_chain.invoke(invoice_text)
print(result['itemwise_price_and_quantity'])

In [None]:
from IPython.display import display_markdown

result_values=result['itemwise_price_and_quantity']
display_markdown(result_values, raw=True)

In [None]:
template="""
Take the information from {invoice_text} and print the client name,phone number, email and total amout
"""

invoice_prompt = PromptTemplate(
    input_variables=["invoice_text"],
    template=template,
)

invoice_chain=LLMChain(llm=llm, prompt=invoice_prompt, output_key="contact_details")
result=invoice_chain.invoke(invoice_text)
print(result['contact_details'])

In [None]:
template="""
Take the information from {invoice_text} and print the bank account number and payment conditions
"""

invoice_prompt = PromptTemplate(
    input_variables=["invoice_text"],
    template=template,
)

invoice_chain=LLMChain(llm=llm, prompt=invoice_prompt, output_key="bank_details")
result=invoice_chain.invoke(invoice_text)
print(result['bank_details'])

# Assignment - Book Summary App