In [2]:
import os
import chromadb
import json

import shutil  # ✅ Properly handles folder deletion

import requests
import time


In [3]:
# ✅ Connect to existing ChromaDB instance
CHROMA_DB_PATH = "./chroma_db"
COLLECTION_NAME = "news_articles"

client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
collection = client.get_collection(COLLECTION_NAME)

# ✅ Load stored articles
data = collection.get()

# ✅ Extract content & metadata
documents = data["documents"]
titles = [meta["title"] for meta in data["metadatas"]]
urls = [meta["url"] for meta in data["metadatas"]]
metadata = data["metadatas"]

print(f"📊 Loaded {len(documents)} articles from ChromaDB.")


📊 Loaded 50 articles from ChromaDB.


## Simple query to retrieve similar articles

In [4]:
def retrieve_similar_articles(query, n_results=3):
    """Retrieve the top related articles from ChromaDB based on the query."""
    results = collection.query(query_texts=[query], n_results=n_results)

    # Extract relevant documents & metadata
    retrieved_articles = []
    for i, doc in enumerate(results["documents"][0]):
        retrieved_articles.append({
            "title": results["metadatas"][0][i]["title"],
            "url": results["metadatas"][0][i]["url"],
            "content": doc
        })

    return retrieved_articles

# ✅ Test with a sample query
query = "Is inflation increasing?"
retrieved_context = retrieve_similar_articles(query)

# ✅ Print retrieved articles
for i, article in enumerate(retrieved_context):
    print(f"🔹 {i+1}. {article['title']} ({article['url']})\n{article['content'][:300]}...\n")


🔹 1. Keppel Ltd. (OTCMKTS:KPELY) Short Interest Down 8.3% in January (https://www.marketbeat.com/instant-alerts/keppel-ltd-otcmktskpely-short-interest-down-83-in-january-2025-02-17/)
Keppel Ltd. (OTCMKTS:KPELY) Short Interest Down 8.3% in January Keppel Ltd. (OTCMKTS:KPELY - Get Free Report) was the recipient of a large decline in short interest in the month of January. As of January 31st, there was short interest totalling 7,700 shares, a decline of 8.3% from the January 15th t...

🔹 2. Presidents or dictators? Battle for power raises alarm as Trump turns to the Supreme Court (https://www.alternet.org/trump-executive-power/)
Presidents or dictators? Battle for power raises alarm as Trump turns to the Supreme Court It is well to remind ourselves that today is President’s Day, not Dictator’s Day.

Of all the things the framers of the Constitution worried about, their biggest worry was that a president would become as powe...

🔹 3. Keppel REIT (OTCMKTS:KREVF) Sees Significant Decline in 

### Expand Content via APIs

- Fact Checking via external APIs
- Google Fact Check! 

In [7]:
def get_fact_check_results(query):
    """Retrieve fact-check results from Google Fact Check API."""
    API_KEY = "AIzaSyAP0d1Ma_yn4TVDXuuGljtJZSdC08P1Y_U"  # Replace with your Google API Key

    params = {
        "query": query,
        "key": API_KEY
    }

    response = requests.get(FACT_CHECK_APIS[0], params=params)
    if response.status_code == 200:
        return response.json().get("claims", [])
    else:
        print(f"⚠️ Google Fact Check API returned status {response.status_code}")
        return []


### Pre-Process Retrieved Context

In [9]:
def merge_context(query, retrieved_articles, fact_check_data):
    """Combine ChromaDB results + external fact-checking data into structured context."""
    expanded_context = f"🔎 Query: {query}\n\n"

    # ✅ Add retrieved articles from ChromaDB
    expanded_context += "📌 Similar Articles:\n"
    for article in retrieved_articles:
        expanded_context += f"🔹 {article['title']} ({article['url']})\n"
        expanded_context += f"{article['content'][:500]}...\n\n"

    # ✅ Add fact-checking results
    if fact_check_data:
        expanded_context += "🛑 Fact-Checking Sources:\n"
        for fact in fact_check_data:
            expanded_context += f"🔹 {fact['title']} ({fact['rating']})\n"
            expanded_context += f"Claim: {fact['claim']}\n"
            expanded_context += f"🔗 {fact['url']}\n\n"

    return expanded_context

# ✅ Merge retrieved data
final_context = merge_context(query, retrieved_context, fact_check_data)
print(final_context[:1000])  # Preview context


🔎 Query: Is inflation increasing?

📌 Similar Articles:
🔹 Keppel Ltd. (OTCMKTS:KPELY) Short Interest Down 8.3% in January (https://www.marketbeat.com/instant-alerts/keppel-ltd-otcmktskpely-short-interest-down-83-in-january-2025-02-17/)
Keppel Ltd. (OTCMKTS:KPELY) Short Interest Down 8.3% in January Keppel Ltd. (OTCMKTS:KPELY - Get Free Report) was the recipient of a large decline in short interest in the month of January. As of January 31st, there was short interest totalling 7,700 shares, a decline of 8.3% from the January 15th total of 8,400 shares. Based on an average daily volume of 2,800 shares, the short-interest ratio is presently 2.8 days.

Get Keppel alerts: Sign Up

Keppel Price Performance

Shares of KPELY opened at...

🔹 Presidents or dictators? Battle for power raises alarm as Trump turns to the Supreme Court (https://www.alternet.org/trump-executive-power/)
Presidents or dictators? Battle for power raises alarm as Trump turns to the Supreme Court It is well to remind ourse

### Send RAG-Enhanced Context to LLM

In [None]:
import ollama
import json
import random
import chromadb

# ✅ Connect to ChromaDB
CHROMA_DB_PATH = "./chroma_db"
COLLECTION_NAME = "news_articles"

client = chromadb.PersistentClient(path=CHROMA_DB_PATH)
collection = client.get_collection(COLLECTION_NAME)

# ✅ Toggle Settings
NUM_TEXTS = 5  # Adjust how many texts to analyze
RANDOMIZE = True  # Set to False to take the first N entries
SAVE_RESULTS = True  # Set to False to disable saving

# ✅ Retrieve articles from ChromaDB
data = collection.get()
documents = data["documents"]  # List of all stored article contents

# ✅ Ensure we have articles available
if len(documents) == 0:
    print("⚠️ No articles found in ChromaDB!")
    exit()

# ✅ Select NUM_TEXTS articles
if RANDOMIZE:
    selected_texts = random.sample(documents, min(NUM_TEXTS, len(documents)))
else:
    selected_texts = documents[:NUM_TEXTS]

print(f"🔹 Running analysis on {len(selected_texts)} texts (Randomized: {RANDOMIZE})\n")

# ✅ LLM Analysis Function
def analyze_with_llm(context):
    """Send the expanded news context to Deepseek-R1 for fake news detection."""
    prompt = f"""
    You are a fact-checking AI. Given the following expanded context, determine if the original query is true, false, or misleading.
    
    {context}

    Provide a credibility score from 0 (completely false) to 100 (completely true).
    """

    response = ollama.chat(
        model="deepseek-r1",
        messages=[{"role": "user", "content": prompt}]
    )

    return response["message"]["content"]

# ✅ Run LLM Analysis on selected texts
results = []
for i, context in enumerate(selected_texts):
    print(f"🔎 **Processing Article {i+1}/{len(selected_texts)}**...")
    llm_output = analyze_with_llm(context)
    print(f"📢 **LLM Response:**\n{llm_output}\n")
    
    # ✅ Store results
    results.append({
        "query": context[:100] + "...",  # Store only a snippet of context
        "response": llm_output
    })

# ✅ Save results if toggle is ON
if SAVE_RESULTS:
    with open("llm_results.json", "w", encoding="utf-8") as f:
        json.dump(results, f, indent=4)

    print(f"✅ Results saved to `llm_results.json`")
else:
    print("🔹 Results not saved (SAVE_RESULTS = False)")


🔹 Running analysis on 5 texts (Randomized: True)

🔎 **Processing Article 1/5**...
