In [4]:
import os
import numpy as np
import requests
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss

embedder = SentenceTransformer("all-MiniLM-L6-v2")


def extract_text_from_file(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
            return text
    except Exception as e:
        print(f"Error processing {file_path}: {str(e)}")
        return ""


def chunk_text(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)


def create_vector_store(text_chunks, embedder):
    embeddings = embedder.encode(text_chunks, show_progress_bar=True)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, embeddings, text_chunks


text_files = input("Enter the paths to your text files: ").split(",")
text_files = [f.strip() for f in text_files]

if text_files:
    print("Processing text files...")
    all_chunks = []
    for file_path in text_files:
        if os.path.exists(file_path):
            text = extract_text_from_file(file_path)
            if text:
                chunks = chunk_text(text)
                all_chunks.extend(chunks)
            else:
                print(f"No text extracted from {file_path}")
        else:
            print(f"File not found: {file_path}")

    if all_chunks:
        vector_store, embeddings, documents = create_vector_store(all_chunks, embedder)
        print("Documents processed and vector store created!")
    else:
        print("No valid documents processed.")
        exit()
else:
    print("No text files provided.")
    exit()

# Query loop
while True:
    query = input("Ask a question about the documents (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        break
    if not query.strip():
        print("Please enter a valid question.")
        continue

    print("Generating response...")
   
    query_embedding = embedder.encode([query])[0]

    # Search for relevant chunks
    D, I = vector_store.search(np.array([query_embedding]), k=3)
    context = [documents[i] for i in I[0]]
    context_text = "\n".join(context)

    
    prompt = f"""
You are an expert assistant. Use the following context to answer the user's question accurately and concisely.
If the context doesn't contain enough information, say so and provide a general answer if possible.

Context:
{context_text}

Question:
{query}
use the following example as guide:
Example 1:
Article Snippet: "A massive wildfire in California has forced thousands of residents to evacuate their homes. 
The fire, which started on Monday, has already burned through 15,000 acres and destroyed dozens of structures. 
Firefighters are struggling to contain the blaze due to strong winds and dry conditions."
Summary: A massive California wildfire, burning 15,000 acres since Monday,
has displaced thousands and destroyed structures, with firefighters battling strong winds to contain it.

###instructions:
 -generate clear response according to the query i.e maintain relevancy
 -in case user asks for summarization ,generate it in atleast 10 sentences with relevancy
 -strictly maintain the specified format for generation
"""
    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": "llama3.2:1b",
                "prompt": prompt,
                "stream": False,
                "options":{
                    "num_ctx":80000,
                }
            }
        )
        result = response.json()
    
        if "response" in result:
            print("\nAnswer:")
            print(result["response"])
        else:
            print("\nError from Ollama:")
            print(result)
    except Exception as e:
        print(f"Error generating response from Ollama: {str(e)}")

Enter the paths to your text files:  sample news1.txt


Processing text files...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Documents processed and vector store created!


Ask a question about the documents (or type 'exit' to quit):  summarize the document which contains the news


Generating response...

Answer:
The Pinaka long-range missiles are a key part of India's evolving artillery capabilities, designed for modern, network-centric warfare. The Indian Army has integrated these missiles into its systems to enhance firepower and readiness.

A recent practice firing of the Pinaka missile system was conducted at the Pokhran Field Firing Ranges in Rajasthan. This was the first known instance of the system being fired publicly since 2003. The exact date of the recent practice is not specified, but it is mentioned that a round of firing is scheduled to take place soon.

The Pinaka Multi-Barrel Rocket Launcher (MBRL) system is indigenous and developed by India, combining high-volume firepower with precision targeting capabilities. It has been praised for its ability to enhance layered firepower and future readiness in modern warfare.

The Pinaka system was designed after the Indian Army's experiences during the Kargil War and subsequent conflicts with Pakistan. Its

Ask a question about the documents (or type 'exit' to quit):  exit


In [None]:
import google.generativeai as genai
import os
import numpy as np
import requests
from sentence_transformers import SentenceTransformer
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss
from datetime import datetime


embedder = SentenceTransformer("all-MiniLM-L6-v2")

# api_key = input("Enter your api key here:")
# if api_key:
#     genai.configure(api_key=api_key)
# else:
#     print("Error: Please provide a valid Gemini API Key.")
#     exit()

NEWSAPI_KEY = "3e55c3c53330465a9ad8ae8e02030bcb"  
NEWSAPI_URL = "https://newsapi.org/v2/everything"

def fetch_news_articles(query, num_articles):
    """Fetch news articles using NewsAPI."""
    try:
        params = {
            "q": query,
            "language": "en",
            "sortBy": "relevancy",
            "pageSize": num_articles,
            "apiKey": NEWSAPI_KEY
        }
        response = requests.get(NEWSAPI_URL, params=params)
        response.raise_for_status()
        articles = response.json().get("articles", [])
        return articles
    except Exception as e:
        print(f"Error fetching news from NewsAPI: {str(e)}")
        return []

def extract_text_from_articles(articles):
    """Extract relevant text from news articles."""
    texts = []
    for article in articles:
        title = article.get("title", "")
        description = article.get("description", "")
        content = article.get("content", "")
        # Combine title, description, and content, removing None values
        text = " ".join(filter(None, [title, description, content]))
        if text.strip():
            texts.append(text)
    return texts

def chunk_text(text):
    """Split text into chunks for vectorization."""
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

def create_vector_store(text_chunks, embedder):
    """Create FAISS vector store from text chunks."""
    embeddings = embedder.encode(text_chunks, show_progress_bar=True)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, embeddings, text_chunks

# Fetch and process news articles
print("Fetching news articles...")
# articles = fetch_news_articles(query="world news", num_articles=10)
articles = fetch_news_articles(" artificial intelligence",20)
if not articles:
    print("No articles fetched. Exiting.")
    exit()

print("Processing articles...")
all_chunks = []
article_texts = extract_text_from_articles(articles)
for text in article_texts:
    if text:
        chunks = chunk_text(text)
        all_chunks.extend(chunks)

if all_chunks:
    vector_store, embeddings, documents = create_vector_store(all_chunks, embedder)
    print("Articles processed and vector store created!")
else:
    print("No valid article texts processed.")
    exit()

# Query loop
while True:
    query = input("Ask a question about the news or request a summary (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        break
    if not query.strip():
        print("Please enter a valid question or summary request.")
        continue

    print("Generating response...")

    # Encode query
    query_embedding = embedder.encode([query])[0]

    # Search for relevant chunks
    D, I = vector_store.search(np.array([query_embedding]), k=5)  # Increased k for more context
    context = [documents[i] for i in I[0]]
    context_text = "\n".join(context)

    prompt=f"""
You are an expert assistant. Use the following context to answer the user's question accurately and concisely.
If the context doesn't contain enough information, say so and provide a general answer if possible.

Context:
{context_text}

Question:
{query}
use the following example as guide:
Example 1:
Article Snippet: "A massive wildfire in California has forced thousands of residents to evacuate their homes. 
The fire, which started on Monday, has already burned through 15,000 acres and destroyed dozens of structures. 
Firefighters are struggling to contain the blaze due to strong winds and dry conditions."
Summary: A massive California wildfire, burning 15,000 acres since Monday,
has displaced thousands and destroyed structures, with firefighters battling strong winds to contain it.

###instructions:
 -generate clear response according to the query i.e maintain relevancy
 -in case user asks for summarization ,generate it in atleast 10 sentences with relevancy
 -strictly maintain the specified format for generation

 Evaluate the following QA pair using RAGAS format. Provide the context passage, the user's question, the expected (ground truth) answer, and the generated answer by the system."

Here’s a complete example you can adapt:


  "context": "The Eiffel Tower, constructed in 1889, is one of the most recognizable landmarks in the world.
   Located in Paris, France, it was initially built for the 1889 World's Fair and stands approximately 300 meters tall.",
  "question": "When was the Eiffel Tower built?",
  "ground_truth": "1889",
  "generated_answer": "The Eiffel Tower was built in 1889."

 
"""
    try:
        response = requests.post(
            "http://localhost:11434/api/generate",
            json={
                "model": "gemma3:1b",
                "prompt": prompt,
                "stream": False,
                "options":{
                    "num_ctx":80000,
                }
            }
        )
        result = response.json()
   # model = genai.GenerativeModel("gemini-2.5-pro-exp-03-25")
    
        if "response" in result:
            print("\nAnswer:")
            print(result["response"])
        else:
            print("\nError from Ollama:")
            print(result)
    except Exception as e:
        print(f"Error generating response from Ollama: {str(e)}")
    # try:
    #     response = model.generate_content(prompt)
    #     print("\nAnswer:")
    #     print(response.text)
    # except Exception as e:
    #     print(f"Error generating response: {str(e)}")
    # print("\n")

Fetching news articles...
Processing articles...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Articles processed and vector store created!


Ask a question about the news or request a summary (or type 'exit' to quit):  provide the summary for the fetched news


Generating response...

Answer:
Okay, here’s the RAGAS-formatted evaluation of the provided QA pair:

**context:** “The Eiffel Tower, constructed in 1889, is one of the most recognizable landmarks in the world.
  Located in Paris, France, it was initially built for the 1889 World’s Fair and stands approximately 300 meters tall.”

**question:** "When was the Eiffel Tower built?"

**ground_truth:** 1889

**generated_answer:** “The Eiffel Tower was built in 1889.”

---

Let’s proceed with another example.

**context:** "The United States' Federal Reserve System is facing increasing scrutiny regarding its role in the 2008 financial crisis.  The system's governance structure, particularly the role of the Federal Open Market Committee (FOMC), has been a subject of debate, with critics arguing it was too closely tied to the President.  Furthermore, the system's policy decisions, such as interest rate adjustments, have been linked to economic downturns.  The Fed's recent focus on inflation and

In [5]:
print(articles)

[{'source': {'id': 'wired', 'name': 'Wired'}, 'author': 'Will Knight', 'title': 'The AI Race Has Gotten Crowded—and China Is Closing In on the US', 'description': 'New research from Stanford suggests artificial intelligence isn’t ruled by just OpenAI and Google, as competition increases across the US, China, and France.', 'url': 'https://www.wired.com/story/stanford-study-global-artificial-intelligence-index/', 'urlToImage': 'https://media.wired.com/photos/67f06761c622bae99bb284bc/191:100/w_1280,c_limit/business_ai_race_us_china.jpg', 'publishedAt': '2025-04-07T10:00:00Z', 'content': 'Stanfords report shows Chinese AI is on the rise overall, with models from Chinese companies scoring similar to their US counterparts on the LMSYS benchmark. It notes that China publishes more AI pap… [+3227 chars]'}, {'source': {'id': 'the-verge', 'name': 'The Verge'}, 'author': 'Tina Nguyen', 'title': 'Wikipedia is using (some) generative AI now', 'description': "Wikipedia isn't replacing their human ed

In [6]:
print(article_texts)

['The AI Race Has Gotten Crowded—and China Is Closing In on the US New research from Stanford suggests artificial intelligence isn’t ruled by just OpenAI and Google, as competition increases across the US, China, and France. Stanfords report shows Chinese AI is on the rise overall, with models from Chinese companies scoring similar to their US counterparts on the LMSYS benchmark. It notes that China publishes more AI pap… [+3227 chars]', "Wikipedia is using (some) generative AI now Wikipedia isn't replacing their human editors with artificial intelligence yet - but they're giving them a bit of an AI boost. On Wednesday, the Wikimedia Foundation, the nonprofit that runs Wikipedia, announced that it was integrating generative AI into its … The sites human editors will have AI perform the tedious tasks that go into writing a Wikipedia article.\r\nThe sites human editors will have AI perform the tedious tasks that go into writing a Wikiped… [+2254 chars]", 'Most Americans don’t trust AI — 

In [None]:
import google.generativeai as genai
import os
import PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle


embedder = SentenceTransformer("all-MiniLM-L6-v2")


api_key = input("Enter your api key here:")
if api_key:
    genai.configure(api_key=api_key)
else:
    print("Error: Please provide a valid Gemini API Key.")
    exit()

# # Function to extract text from PDFs
# def extract_text_from_pdf(file_path):
#     try:
#         with open(file_path, 'rb') as file:
#             pdf_reader = PyPDF2.PdfReader(file)
#             text = ""
#             for page in pdf_reader.pages:
#                 extracted = page.extract_text()
#                 if extracted:
#                     text += extracted
#             return text
#     except Exception as e:
#         print(f"Error processing {file_path}: {str(e)}")
#         return ""
NEWSAPI_KEY = "3e55c3c53330465a9ad8ae8e02030bcb"  
NEWSAPI_URL = "https://newsapi.org/v2/everything"

def fetch_news_articles(query="Artificial intelligence related  news", num_articles=10):
    """Fetch news articles using NewsAPI."""
    try:
        params = {
            "q": query,
            "language": "en",
            "sortBy": "publishedAt",
            "pageSize": num_articles,
            "apiKey": NEWSAPI_KEY
        }
        response = requests.get(NEWSAPI_URL, params=params)
        response.raise_for_status()
        articles = response.json().get("articles", [])
        return articles
    except Exception as e:
        print(f"Error fetching news from NewsAPI: {str(e)}")
        return []

def extract_text_from_articles(articles):
    """Extract relevant text from news articles."""
    texts = []
    for article in articles:
        title = article.get("title", "")
        description = article.get("description", "")
        content = article.get("content", "")
        # Combine title, description, and content, removing None values
        text = " ".join(filter(None, [title, description, content]))
        if text.strip():
            texts.append(text)
    return texts
# Function to chunk text
def chunk_text(text):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    return text_splitter.split_text(text)

# Function to create vector store
def create_vector_store(text_chunks, embedder):
    embeddings = embedder.encode(text_chunks, show_progress_bar=True)
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(embeddings)
    return index, embeddings, text_chunks

# Process PDF files
pdf_files = input("Enter the paths to your PDF files : ").split(",")
pdf_files = [f.strip() for f in pdf_files]

if pdf_files:
    print("Processing PDFs...")
    all_chunks = []
    for file_path in pdf_files:
        if os.path.exists(file_path):
            text = extract_text_from_pdf(file_path)
            if text:
                chunks = chunk_text(text)
                all_chunks.extend(chunks)
            else:
                print(f"No text extracted from {file_path}")
        else:
            print(f"File not found: {file_path}")

    # Create vector store
    if all_chunks:
        vector_store, embeddings, documents = create_vector_store(all_chunks, embedder)
        print("Documents processed and vector store created!")
    else:
        print("No valid documents processed.")
        exit()
else:
    print("No PDF files provided.")
    exit()

# Query loop
while True:
    query = input("Ask a question about the documents (or type 'exit' to quit): ")
    if query.lower() == 'exit':
        break
    if not query.strip():
        print("Please enter a valid question.")
        continue

    print("Generating response...")
    
    query_embedding = embedder.encode([query])[0]

    # Search for relevant chunks
    D, I = vector_store.search(np.array([query_embedding]), k=3)
    context = [documents[i] for i in I[0]]
    context_text = "\n".join(context)

    # Prepare prompt 
    prompt = f"""
    You are an expert assistant. Use the following context to answer the user's question accurately and concisely.
    If the context doesn't contain enough information, say so and provide a general answer if possible.

    Context:
    {context_text}

    Question:
    {query}

    ###instructions:
    -generate clear response according to the query i.e maintain relevancy
    -in case user asks for mcq questions generation,maintain variability and uniqueness among generated response
    """

    
    model = genai.GenerativeModel("gemini-2.5-pro-exp-03-25")

    # Generate response
    try:
        response = model.generate_content(prompt)
        print("\nAnswer:")
        print(response.text)
    except Exception as e:
        print(f"Error generating response: {str(e)}")
    print("\n")