In [1]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import os

In [17]:
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [5]:
safety_settings_NONE=[
    { "category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE" },
    { "category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE" },
    { "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE" },
    { "category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}
]

In [6]:
urls=["https://www.hindustantimes.com/india-news/external-affairs-minister-s-jaishankar-on-israel-hamas-war-palestinians-have-been-denied-their-homeland-101711592719874.html","https://www.livemint.com/news/india/arvind-kejriwal-arrest-news-live-updates-delhi-cm-bjp-high-court-hc-ed-arrest-atishi-aap-liquor-policy-money-laundering-11711528865907.html"]

In [7]:
urls[0]

'https://www.hindustantimes.com/india-news/external-affairs-minister-s-jaishankar-on-israel-hamas-war-palestinians-have-been-denied-their-homeland-101711592719874.html'

In [8]:
def split_docs(documents, chunk_size=1000, chunk_overlap=100):
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
  docs = text_splitter.split_documents(documents)
  return docs

In [9]:
def preprocess(urls):
    loader = WebBaseLoader(urls)
    documents=loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=2000)
    docs = text_splitter.split_documents(documents)
    docs = split_docs(documents)
    return docs

In [10]:
documents=preprocess(urls[0])
docs = split_docs(documents)
print(len(docs))

15


In [11]:
llm = GoogleGenerativeAI(model="gemini-pro",convert_system_message_to_human=True)
llm.client = genai.GenerativeModel(model_name='gemini-pro', safety_settings=safety_settings_NONE)

In [13]:

def user_input(user_question):

    prompt_template = """
    Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
    provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
    Context:\n {context}?\n
    Question: \n{question}\n

    Answer:
    """
    prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])
    chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)
    response = chain(
    {"input_documents":docs, "question": user_question}
    , return_only_outputs=True)

    print(response)

In [16]:
user_input("News about jaishankar")

{'output_text': 'According to the context provided, External affairs minister S Jaishankar has expressed concern over the death of civilians in the ongoing Israel-Hamas war stating that Palestinians have been denied their homeland. He also urged Israel to cater to international humanitarian law in its ongoing retaliation after the deadly October 7 attack by Hamas. However, S Jaishankar acknowledged that the October 7 attack was “terrorism”.'}


In [15]:
user_input("summarize the News")

{'output_text': "External affairs minister S Jaishankar expressed concern over the death of civilians in the ongoing Israel-Hamas war. He also urged Israel to cater to international humanitarian law in its ongoing retaliation after the deadly October 7 attack by Hamas. However, S Jaishankar acknowledged that the October 7 attack was “terrorism”. India has continued to support the 'two-state solution' to the long-running Israel-Palestine conflict."}
