# NEWS AGGREGATOR

Installations

In [6]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai langchain-experimental neo4j wikipedia tiktoken yfiles_jupyter_graphs

In [7]:
!pip install --upgrade --quiet newsapi-python requests

Imports

In [8]:
import os
from google.colab import userdata
from typing import Tuple, List, Optional
from yfiles_jupyter_graphs import GraphWidget
from neo4j import GraphDatabase
from langchain_community.vectorstores import Neo4jVector
from langchain_community.graphs import Neo4jGraph

In [9]:
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import ConfigurableField
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts.prompt import PromptTemplate

In [10]:
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

In [11]:
try:
  import google.colab
  from google.colab import output
  output.enable_custom_widget_manager()
except:
  pass

API Keys

In [12]:
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
NEWSAPI_KEY = userdata.get('NEWSAPI_KEY')

In [13]:
NEWSAPI_KEY

'7fc3b87a369f46e5922f0df9182dbfb7'

Credentials

In [14]:
NEO4J_URI="neo4j+s://87dc4a97.databases.neo4j.io"
NEO4J_USERNAME="neo4j"
NEO4J_PASSWORD="2_kgZguSlSe8VXM5fmFa4eRaPMAdykeBoKPdz_D6SGc"

Updating environment

In [15]:
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
os.environ["NEO4J_URI"] = NEO4J_URI
os.environ["NEO4J_USERNAME"] = NEO4J_USERNAME
os.environ["NEO4J_PASSWORD"] = NEO4J_PASSWORD
os.environ["NEWSAPI_KEY"] = NEWSAPI_KEY

Graphs   


Future Error:
<ipython-input-13-6e7385464d00>:1: LangChainDeprecationWarning: The class `Neo4jGraph` was deprecated in LangChain 0.3.8 and will be removed in 1.0. An updated version of the class exists in the :class:`~langchain-neo4j package and should be used instead. To use it run `pip install -U :class:`~langchain-neo4j` and import as `from :class:`~langchain_neo4j import Neo4jGraph``.
  graph = Neo4jGraph()

In [35]:
graph = Neo4jGraph()

In [38]:
# import requests
# import json

# def fetch_articles(api_key, query, filename):
#     url = f"https://newsapi.org/v2/everything?q={query}&from=2025-01-02&sortBy=popularity&apiKey={api_key}"
#     response = requests.get(url)
#     articles = response.json().get('articles', [])

#     # Store articles in a text file
#     with open(filename, 'w') as file:
#         for article in articles:
#             title = article.get('title')
#             description = article.get('description')
#             url = article.get('url')
#             file.write(f"Title: {title}\nDescription: {description}\nURL: {url}\n\n")

# # Example usage
# api_key = 'YOUR_API_KEY'
# fetch_articles(api_key, 'Apple', 'apple_articles.txt')

In [48]:
import requests
import time
import os
from typing import List, Dict
from datetime import datetime, timedelta

def count_words(text: str) -> int:
    """Count words in a text string."""
    if not text:
        return 0
    return len(text.split())

def create_safe_filename(topic: str, title: str) -> str:
    """Create a safe filename from topic and title."""
    # Remove or replace invalid filename characters
    invalid_chars = '<>:"/\\|?*'
    safe_title = ''.join(c if c not in invalid_chars else '_' for c in title)
    safe_title = safe_title[:100]  # Limit length
    return f"{topic}_{safe_title}.txt"

def fetch_multiple_topics(api_key: str, topics: List[str], database_folder: str = "database") -> Dict[str, int]:
    """
    Fetch articles for multiple topics and save each article to a separate file.

    Args:
        api_key: NewsAPI key
        topics: List of topics to fetch articles for
        database_folder: Folder to store article files
        days_from: Number of days from today to fetch articles

    Returns:
        Dictionary with topics and their saved article counts
    """
    # Ensure database folder exists
    os.makedirs(database_folder, exist_ok=True)
    article_counts = {topic: 0 for topic in topics}

    for topic in topics:
        try:
            url = (
                f"https://newsapi.org/v2/everything"
                f"?q={topic}"
                f"&sortBy=popularity"
                f"&pageSize=100"
                f"&apiKey={api_key}"
            )

            response = requests.get(url)
            response.raise_for_status()

            articles = response.json().get('articles', [])

            for article in articles:
                title = article.get('title', 'No title')
                content = article.get('content', '')
                description = article.get('description', '')

                # Combine content and description for word count
                full_text = f"{content}\n{description}".strip()
                word_count = count_words(full_text)

                # Skip if content is too short
                if word_count < 10:
                    continue

                # Create filename using topic and title
                filename = create_safe_filename(topic, title)
                filepath = os.path.join(database_folder, filename)

                # Write article to file
                with open(filepath, 'w', encoding='utf-8') as file:
                    # Write metadata header
                    file.write("=" * 50 + "\n")
                    file.write(f"Topic: {topic}\n")
                    file.write(f"Title: {title}\n")
                    file.write(f"Published: {article.get('publishedAt', 'No date')}\n")
                    file.write(f"Source: {article.get('source', {}).get('name', 'Unknown')}\n")
                    file.write(f"URL: {article.get('url', 'No URL')}\n")
                    file.write(f"Word Count: {word_count}\n")
                    file.write("=" * 50 + "\n\n")

                    # Write content
                    file.write(full_text)

                article_counts[topic] += 1

            # Sleep to respect API rate limits
            time.sleep(1)

        except requests.exceptions.RequestException as e:
            print(f"Error fetching articles for {topic}: {str(e)}")
            continue

    return article_counts

# Example usage
if __name__ == "__main__":
    API_KEY = NEWSAPI_KEY  # Get API key from environment variable

    topics = [
        "technology",
        "artificial intelligence",
        "economics",
        "politics",
        "climate change"
    ]

    results = fetch_multiple_topics(
        api_key=API_KEY,
        topics=topics,
        database_folder="database"
    )

    # Print summary
    print("\nArticles saved (400+ words only):")
    for topic, count in results.items():
        print(f"{topic}: {count} articles")


Articles saved (400+ words only):
technology: 85 articles
artificial intelligence: 96 articles
economics: 100 articles
politics: 97 articles
climate change: 96 articles


In [None]:
# import requests

# def fetch_article_metadata(api_key, query):
#     url = f"https://newsapi.org/v2/everything?q={query}&sortBy=popularity&apiKey={api_key}"
#     response = requests.get(url)
#     articles = response.json().get('articles', [])
#     return articles

# # Example usage
# api_key = NEWSAPI_KEY
# articles = fetch_article_metadata(api_key, 'Apple')
# for article in articles:
#     print(article['title'], article['url'])

In [None]:
# import requests
# from bs4 import BeautifulSoup

# def scrape_article_content(article_url):
#     response = requests.get(article_url)
#     soup = BeautifulSoup(response.content, 'html.parser')

#     # This is a simple example; you may need to adjust selectors based on the website structure.
#     paragraphs = soup.find_all('p')
#     article_content = '\n'.join([para.get_text() for para in paragraphs])

#     return article_content

# # Example usage
# for article in articles:
#     url = article['url']
#     content = scrape_article_content(url)
#     print(content)  # You can also save this content to a file


In [None]:
# def save_article_to_file(title, content):
#     filename = f"{title}.txt".replace('/', '-')  # Replace slashes for valid filename
#     with open(filename, 'w', encoding='utf-8') as file:
#         file.write(content)

# # Example usage
# for article in articles:
#     title = article['title']
#     url = article['url']
#     content = scrape_article_content(url)
#     save_article_to_file(title, content)


Database Headaches

In [None]:
!pip install pinecone-client openai langchain-core tqdm

In [51]:
import pinecone
from openai import OpenAI
import os
from typing import List, Dict
import json
from tqdm import tqdm
from langchain.text_splitter import RecursiveCharacterTextSplitter
import time

In [53]:
PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')

In [54]:
from pinecone import Pinecone, ServerlessSpec



In [None]:
# class ArticleRAG:
#     def __init__(self, database_folder: str = "database", index_name: str = "articles-embeddings"):
#         """
#         Initialize RAG system with Pinecone

#         Args:
#             database_folder: Folder containing article files
#             index_name: Name for the Pinecone index
#         """
#         # Initialize OpenAI client
#         self.openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

#         # Initialize Pinecone
#         self.pc = Pinecone(PINECONE_API_KEY)
#         self.index_name = index_name

#         # Create index if it doesn't exist
#         existing_indexes = [index.name for index in self.pc.list_indexes()]
#         if index_name not in existing_indexes:
#             self.pc.create_index(
#                 name=index_name,
#                 dimension=1536,  # dimension for text-embedding-3-small
#                 metric="cosine",
#                 spec=ServerlessSpec(
#                         cloud="aws",
#                         region="us-east-1"
#                      )
#             )
#             print(f"Created new Pinecone index: {index_name}")
#             # Wait for index to be ready
#             time.sleep(20)

#         self.database_folder = database_folder

#         # Initialize text splitter
#         self.text_splitter = RecursiveCharacterTextSplitter(
#             chunk_size=1000,
#             chunk_overlap=200,
#             length_function=len
#         )

#     def get_embedding(self, text: str) -> List[float]:
#         """Get embeddings using OpenAI API."""
#         response = self.openai_client.embeddings.create(
#             input=text,
#             model="text-embedding-3-small"
#         )
#         return response.data[0].embedding

#     def read_article_file(self, filepath: str) -> Dict:
#         """Read and parse article file with metadata."""
#         with open(filepath, 'r', encoding='utf-8') as file:
#             content = file.read()

#             # Split metadata and content
#             parts = content.split('='*50)
#             metadata_text = parts[1].strip()
#             article_content = parts[2].strip()

#             # Parse metadata
#             metadata = {}
#             for line in metadata_text.split('\n'):
#                 if ':' in line:
#                     key, value = line.split(':', 1)
#                     metadata[key.strip()] = value.strip()

#             return {
#                 'metadata': metadata,
#                 'content': article_content
#             }

#     def process_and_embed_articles(self, batch_size: int = 100):
#         """Process all articles and add them to Pinecone."""
#         print("Processing and embedding articles...")

#         vectors_to_upsert = []

#         for filename in tqdm(os.listdir(self.database_folder)):
#             if not filename.endswith('.txt'):
#                 continue

#             filepath = os.path.join(self.database_folder, filename)
#             article_data = self.read_article_file(filepath)

#             # Split content into chunks
#             chunks = self.text_splitter.split_text(article_data['content'])

#             # Prepare metadata for each chunk
#             metadata = article_data['metadata']

#             # Process chunks
#             for i, chunk in enumerate(chunks):
#                 chunk_metadata = {
#                     **metadata,
#                     'chunk_index': i,
#                     'total_chunks': len(chunks),
#                     'content': chunk  # Store content in metadata for retrieval
#                 }

#                 # Create unique ID for each chunk
#                 vector_id = f"{filename}_{i}"

#                 try:
#                     embedding = self.get_embedding(chunk)
#                     vectors_to_upsert.append((vector_id, embedding, chunk_metadata))

#                     # Batch upsert when we reach batch_size
#                     if len(vectors_to_upsert) >= batch_size:
#                         self._upsert_batch(vectors_to_upsert)
#                         vectors_to_upsert = []

#                 except Exception as e:
#                     print(f"Error processing chunk {vector_id}: {str(e)}")

#         # Upsert any remaining vectors
#         if vectors_to_upsert:
#             self._upsert_batch(vectors_to_upsert)

#     def _upsert_batch(self, vectors):
#         """Upsert a batch of vectors to Pinecone."""
#         try:
#             self.pc.Index(self.index_name).upsert(
#                 vectors=[(id, embedding, metadata) for id, embedding, metadata in vectors]
#             )
#         except Exception as e:
#             print(f"Error upserting batch: {str(e)}")

#     def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
#         """
#         Query the vector database

#         Args:
#             query_text: Text to search for
#             n_results: Number of results to return

#         Returns:
#             List of results with content and metadata
#         """
#         query_embedding = self.get_embedding(query_text)

#         results = self.pc.Index(self.index_name).query(
#             vector=query_embedding,
#             top_k=n_results,
#             include_metadata=True
#         )

#         # Format results
#         formatted_results = []
#         for match in results.matches:
#             formatted_results.append({
#                 'content': match.metadata['content'],
#                 'metadata': {k:v for k,v in match.metadata.items() if k != 'content'},
#                 'score': match.score
#             })

#         return formatted_results

#     def get_collection_stats(self) -> Dict:
#         """Get statistics about the collection."""
#         stats = self.pc.Index(self.index_name).describe_index_stats()
#         return {
#             'total_vectors': stats.total_vector_count,
#             'dimension': stats.dimension
#         }

# # Example usage
# if __name__ == "__main__":
#     # Initialize RAG system
#     rag = ArticleRAG()

#     # Process and embed all articles
#     rag.process_and_embed_articles()

#     # Print collection statistics
#     stats = rag.get_collection_stats()
#     print("\nCollection Statistics:")
#     print(f"Total Vectors: {stats['total_vectors']}")
#     print(f"Embedding Dimension: {stats['dimension']}")

#     # Example query
#     query = "What are the latest developments in artificial intelligence?"
#     results = rag.query(query)

#     print(f"\nQuery: {query}")
#     print("\nTop Results:")
#     for i, result in enumerate(results, 1):
#         print(f"\n{i}. {result['metadata']['Title']}")
#         print(f"Topic: {result['metadata']['Topic']}")
#         print(f"Source: {result['metadata']['Source']}")
#         print(f"Relevance Score: {result['score']:.2f}")
#         print(f"Content Preview: {result['content'][:200]}...")

In [71]:
import hashlib

In [61]:
from pinecone import Pinecone
from sentence_transformers import SentenceTransformer
import os
from typing import List, Dict
import json
from tqdm import tqdm
from langchain.text_splitter import RecursiveCharacterTextSplitter
import time
import torch

In [76]:
class ArticleRAG:
    def __init__(self, database_folder: str = "database", index_name: str = "articles-embeddings", force_recreate: bool = False):
        """
        Initialize RAG system with Pinecone and HuggingFace model

        Args:
            database_folder: Folder containing article files
            index_name: Name for the Pinecone index
            force_recreate: If True, deletes existing index and creates a new one
        """
        # Initialize embedding model
        print("Loading embedding model...")
        self.model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
        self.embedding_dimension = self.model.get_sentence_embedding_dimension()

        # Set device (GPU if available)
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.model = self.model.to(self.device)

        # Initialize Pinecone
        self.pc = Pinecone(PINECONE_API_KEY)
        self.index_name = index_name

        # Handle existing index
        existing_indexes = [index.name for index in self.pc.list_indexes()]

        if index_name in existing_indexes:
            if force_recreate:
                print(f"Deleting existing index {index_name}...")
                self.pc.delete_index(index_name)
                time.sleep(20)  # Wait for deletion
            else:
                # Check dimension of existing index
                index_stats = self.pc.Index(index_name).describe_index_stats()
                if index_stats.dimension != self.embedding_dimension:
                    raise ValueError(
                        f"Existing index dimension ({index_stats.dimension}) doesn't match model dimension "
                        f"({self.embedding_dimension}). Use force_recreate=True to recreate the index."
                    )

        # Create index if it doesn't exist
        if index_name not in existing_indexes or force_recreate:
            print(f"Creating new Pinecone index: {index_name}")
            self.pc.create_index(
                name=index_name,
                dimension=self.embedding_dimension,
                metric="cosine",
                spec=ServerlessSpec(
                        cloud="aws",
                        region="us-east-1"
                     )
            )
            print("Waiting for index to be ready...")
            time.sleep(20)

        self.database_folder = database_folder

        # Initialize text splitter
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=100,
            length_function=len
        )

    def get_embedding(self, text: str) -> List[float]:
        """Get embeddings using SentenceTransformer model."""
        with torch.no_grad():
            embedding = self.model.encode(text, convert_to_numpy=True)
            return embedding.tolist()

    def read_article_file(self, filepath: str) -> Dict:
        """Read and parse article file with metadata."""
        with open(filepath, 'r', encoding='utf-8') as file:
            content = file.read()

            # Split metadata and content
            parts = content.split('='*50)
            metadata_text = parts[1].strip()
            article_content = parts[2].strip()

            # Parse metadata
            metadata = {}
            for line in metadata_text.split('\n'):
                if ':' in line:
                    key, value = line.split(':', 1)
                    metadata[key.strip()] = value.strip()

            return {
                'metadata': metadata,
                'content': article_content
            }

    def generate_vector_id(self, filename: str, chunk_index: int) -> str:
        """
        Generate a unique, ASCII-compliant vector ID.

        Args:
            filename: Original filename (may contain non-English characters).
            chunk_index: Index of the chunk.

        Returns:
            A unique vector ID containing only English alphabets and numbers.
        """
        # Use only alphanumeric characters in the filename
        sanitized_filename = ''.join(c for c in filename if c.isalnum())

        # Hash the sanitized filename to ensure uniqueness
        unique_hash = hashlib.md5(sanitized_filename[:7].encode('utf-8')).hexdigest()

        # Combine with chunk index to create final vector ID
        return f"{unique_hash}_{chunk_index}"


    def process_and_embed_articles(self, batch_size: int = 50):
        """Process all articles and add them to Pinecone."""
        print("Processing and embedding articles...")

        vectors_to_upsert = []

        for filename in tqdm(os.listdir(self.database_folder)):
            if not filename.endswith('.txt'):
                continue

            filepath = os.path.join(self.database_folder, filename)
            article_data = self.read_article_file(filepath)

            # Split content into chunks
            chunks = self.text_splitter.split_text(article_data['content'])

            # Prepare metadata for each chunk
            metadata = article_data['metadata']

            # Process chunks
            for i, chunk in enumerate(chunks):
                chunk_metadata = {
                    **metadata,
                    'chunk_index': i,
                    'total_chunks': len(chunks),
                    'content': chunk  # Store content in metadata for retrieval
                }

                # Generate valid ASCII vector ID
                sanitized_filename = ''.join(c if c.isalnum() or c in ['_', '-'] else '_' for c in filename)
                vector_id = self.generate_vector_id(filename, i)

                try:
                    embedding = self.get_embedding(chunk)
                    vectors_to_upsert.append((vector_id, embedding, chunk_metadata))

                    # Batch upsert when we reach batch_size
                    if len(vectors_to_upsert) >= batch_size:
                        self._upsert_batch(vectors_to_upsert)
                        vectors_to_upsert = []

                except Exception as e:
                    print(f"Error processing chunk {vector_id}: {str(e)}")

        # Upsert any remaining vectors
        if vectors_to_upsert:
            self._upsert_batch(vectors_to_upsert)


    def _upsert_batch(self, vectors):
        """Upsert a batch of vectors to Pinecone."""
        try:
            self.pc.Index(self.index_name).upsert(
                vectors=[(id, embedding, metadata) for id, embedding, metadata in vectors]
            )
        except Exception as e:
            print(f"Error upserting batch: {str(e)}")

    def query(self, query_text: str, n_results: int = 5) -> List[Dict]:
        """
        Query the vector database

        Args:
            query_text: Text to search for
            n_results: Number of results to return

        Returns:
            List of results with content and metadata
        """
        query_embedding = self.get_embedding(query_text)

        results = self.pc.Index(self.index_name).query(
            vector=query_embedding,
            top_k=n_results,
            include_metadata=True
        )

        # Format results
        formatted_results = []
        for match in results.matches:
            formatted_results.append({
                'content': match.metadata['content'],
                'metadata': {k:v for k,v in match.metadata.items() if k != 'content'},
                'score': match.score
            })

        return formatted_results

    def get_collection_stats(self) -> Dict:
        """Get statistics about the collection."""
        stats = self.pc.Index(self.index_name).describe_index_stats()
        return {
            'total_vectors': stats.total_vector_count,
            'dimension': stats.dimension
        }


In [77]:
if __name__ == "__main__":
    # Initialize RAG system with force_recreate=True to ensure correct dimensions
    rag = ArticleRAG(
        index_name="articles-embeddings",
        force_recreate=True  # This will delete existing index and create new one
    )

    # Process and embed all articles
    rag.process_and_embed_articles()

    # Print collection statistics
    stats = rag.get_collection_stats()
    print("\nCollection Statistics:")
    print(f"Total Vectors: {stats['total_vectors']}")
    print(f"Embedding Dimension: {stats['dimension']}")

    # Example query
    query = "What are the latest developments in artificial intelligence?"
    results = rag.query(query)

    print(f"\nQuery: {query}")
    print("\nTop Results:")
    for i, result in enumerate(results, 1):
        print(f"\n{i}. {result['metadata']['Title']}")
        print(f"Topic: {result['metadata']['Topic']}")
        print(f"Source: {result['metadata']['Source']}")
        print(f"Relevance Score: {result['score']:.2f}")
        print(f"Content Preview: {result['content'][:200]}...")

Loading embedding model...
Deleting existing index articles-embeddings...
Creating new Pinecone index: articles-embeddings
Waiting for index to be ready...
Processing and embedding articles...


100%|██████████| 477/477 [03:17<00:00,  2.41it/s]



Collection Statistics:
Total Vectors: 5
Embedding Dimension: 768

Query: What are the latest developments in artificial intelligence?

Top Results:

1. I Tried to Build a Website Using AI in 3 Hours -- and It Only Took 30 Minutes
Topic: artificial intelligence
Source: CNET
Relevance Score: 0.25
Content Preview: There are many reasons you might need to build a website in 3 hours, and some people are particularly driven to do so -- a creative person who loves the idea of experimenting with artificial intellig…...

2. Exclusive: Read the 9-page memo hyperscaler startup Nscale used to raise a $155 million Series A
Topic: economics
Source: Business Insider
Relevance Score: 0.16
Content Preview: Josh Payne, founder and CEO of Nscale.Nscale
<ul><li>Nscale has raised $155 million in Series A funding for its hyperscaler platform.</li><li>The startup offers everything from access to data center… ...

3. A palm oil company, a group of US financiers, and the destruction of Peru's rainforest
Topic

In [None]:
!pip install fastapi nest-asyncio pyngrok uvicorn

In [80]:
!pip install py2neo

Collecting py2neo
  Downloading py2neo-2021.2.4-py2.py3-none-any.whl.metadata (9.9 kB)
Collecting interchange~=2021.0.4 (from py2neo)
  Downloading interchange-2021.0.4-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting monotonic (from py2neo)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting pansi>=2020.7.3 (from py2neo)
  Downloading pansi-2024.11.0-py2.py3-none-any.whl.metadata (3.1 kB)
Downloading py2neo-2021.2.4-py2.py3-none-any.whl (177 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/177.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.2/177.2 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading interchange-2021.0.4-py2.py3-none-any.whl (28 kB)
Downloading pansi-2024.11.0-py2.py3-none-any.whl (26 kB)
Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)
Installing collected packages: monotonic, pansi, interchange, py2neo
Successfully installed interchange-202

In [17]:
# Backend and Neo4j Integration in FastAPI
from fastapi import FastAPI, HTTPException
from py2neo import Graph, Node, Relationship
from pydantic import BaseModel
import uvicorn
import threading
# FastAPI News Endpoint
from typing import List, Dict

In [18]:
app = FastAPI()

# Neo4j Connection
# graph = Graph("bolt://localhost:7687", auth=("neo4j", "password"))
graph = Neo4jGraph()

class FetchNewsRequest(BaseModel):
    user_id: str
    category: str
    custom_category: str

@app.post("/fetch_news/")
def fetch_news(data: FetchNewsRequest):

    news = [
        {"title": f"Latest in {data.category}", "content": "News content about this category..."},
        {"title": f"More on {data.custom_category}", "content": "Custom category-related news..."}
    ]
    return {"news": news}

# Run FastAPI server in a thread
def run_fastapi():
    uvicorn.run(app, host="0.0.0.0", port=8000)

threading.Thread(target=run_fastapi, daemon=True).start()

  graph = Neo4jGraph()
INFO:     Started server process [80372]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


In [None]:
# # FastAPI News Endpoint
# from typing import List, Dict

# class FetchNewsRequest(BaseModel):
#     user_id: str
#     category: str
#     custom_category: str

# @app.post("/fetch_news/")
# def fetch_news(data: FetchNewsRequest):
#     # Example: Fetch news based on user preferences
#     # This could be dynamically generated from a news API or database
#     news = [
#         {"title": f"Latest in {data.category}", "content": "News content about this category..."},
#         {"title": f"More on {data.custom_category}", "content": "Custom category-related news..."}
#     ]
#     return {"news": news}

In [None]:
!pip install gradio

In [19]:
import gradio as gr
import requests

# Backend URL
backend_url = "http://0.0.0.0:8000/"

# Function to send data to backend and fetch news
def fetch_news(user_id, category, custom_category):
    payload = {
        "user_id": user_id,
        "category": category,
        "custom_category": custom_category,
    }
    # Backend endpoint to fetch news (implement this in FastAPI)
    response = requests.post(f"{backend_url}/fetch_news/", json=payload)

    if response.status_code == 200:
        news = response.json().get("news", [])
        cards = "\n\n".join([f"**{n['title']}**\n{n['content']}" for n in news])
        return cards
    else:
        return "Error fetching news!"

# Gradio UI
users = ["1", "2", "3"]

with gr.Blocks() as ui:
    # User selection
    user_dropdown = gr.Dropdown(users, label="Select User", value="1")
    # Category selection
    category_dropdown = gr.Dropdown(["Tech", "Sports", "Finance", "Health"], label="Select Category")
    # Custom category input
    custom_category_input = gr.Textbox(label="Enter Custom Category (optional)")
    # Button to fetch news
    fetch_button = gr.Button("Fetch News")
    # News display area
    news_display = gr.Textbox(label="News", interactive=False, placeholder="News will appear here")

    # Button click event
    fetch_button.click(
        fn=fetch_news,
        inputs=[user_dropdown, category_dropdown, custom_category_input],
        outputs=news_display
    )

ui.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://50549cd626136aae2b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


