In [1]:
import json
import torch
from transformers import AutoTokenizer, AutoModel
from langchain.docstore.document import Document
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA
from langchain.indexes.vectorstore import VectorstoreIndexCreator
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import FAISS  # Using FAISS for vector storage
from langchain_groq import ChatGroq

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load the JSON file
with open('news_article.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Helper function to chunk text
def chunk_text(text, max_length=512):
    words = text.split()
    for i in range(0, len(words), max_length):
        yield ' '.join(words[i:i + max_length])

# Manually extract relevant parts from the JSON data
documents = []
for article in data:
    title = article.get('title', 'No Title')
    content = article.get('articleBody', 'No Content')
    document = Document(page_content=content, metadata={'title': title})
    documents.append(document)

huggingface_embeddings=HuggingFaceBgeEmbeddings(
    model_name="BAAI/bge-small-en-v1.5",      #sentence-transformers/all-MiniLM-l6-v2
    model_kwargs={'device':'cpu'},
    encode_kwargs={'normalize_embeddings':True}
)



In [3]:
# Create a vector store (using FAISS for persistence)
vector_store = FAISS.from_documents(documents, huggingface_embeddings)

In [4]:
# Initialize the language model
# llm = Ollama(model='llama3')
llm=ChatGroq(groq_api_key='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',
             model_name="Llama3-8b-8192")

In [5]:
# Create the question-answering chain
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=vector_store.as_retriever())

In [6]:
# Ask a question
query = "What happened at the Al-Shifa Hospital?"
result = qa.run(query)
print(result)

  warn_deprecated(


According to the provided context, the Al-Shifa Hospital in Gaza City was raided by Israeli forces on March 18, 2024. The raid was part of a larger military operation aimed at dismantling Hamas' command structure in northern Gaza.

According to reports, the Israeli military said they were conducting "precise operational activities against terrorists" located at Al-Shifa. The hospital was heavily damaged, and thousands of people were sheltering inside the complex at the time of the raid.

Eyewitnesses reported heavy firing in the vicinity, and some residents claimed that their homes were shelled, with children buried under the rubble. The hospital's medical staff and patients were trapped inside, with limited access to food, water, and medical supplies.

Some patients and medical personnel were detained by Israeli troops, and some reported being beaten and mistreated. A Palestinian paramedic who was detained for three days alleged that he was stripped naked, beaten, and left outside in 

In [8]:
# Ask a question
query = "Tell me something about Yemen air strikes in 2024"
result = qa.run(query)
print(result)

Based on the provided text, here are some key points about Yemen air strikes in 2024:

1. **US-British aggression**: The US and UK launched airstrikes in Yemen, targeting the provinces of Hodeidah and Saada.
2. **Multiple airstrikes**: The airstrikes were carried out in multiple locations, including the coastal province of Hodeidah and the Saada Province.
3. **Red Sea strikes**: The US-British aggression aircraft targeted the al-Irj area in the Bajil District and the Ras Issa area in the al-Salif District, both northwest of Hodeidah, with 13 airstrikes.
4. **Saada Province strikes**: The airstrikes also hit the Takhya area in the Majz District in the Saada Province, north of the country, with 5 airstrikes.
5. **Yemeni Armed Forces response**: The Yemeni Armed Forces responded to the US-British aggression with naval missiles, targeting the American ship "Pinocchio" in the Red Sea, according to Brigadier General Yahya Saree, the spokesperson for the Yemeni Armed Forces.
6. **Escalation**