<a href="https://colab.research.google.com/github/aatika-hakim/news-ai-agent/blob/main/News_Agent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install langchain langgraph langchain_core

Collecting langchain
  Downloading langchain-0.3.4-py3-none-any.whl.metadata (7.1 kB)
Collecting langgraph
  Downloading langgraph-0.2.39-py3-none-any.whl.metadata (13 kB)
Collecting langchain_core
  Downloading langchain_core-0.3.12-py3-none-any.whl.metadata (6.3 kB)
Collecting langchain-text-splitters<0.4.0,>=0.3.0 (from langchain)
  Downloading langchain_text_splitters-0.3.0-py3-none-any.whl.metadata (2.3 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.137-py3-none-any.whl.metadata (13 kB)
Collecting langgraph-checkpoint<3.0.0,>=2.0.0 (from langgraph)
  Downloading langgraph_checkpoint-2.0.2-py3-none-any.whl.metadata (4.6 kB)
Collecting langgraph-sdk<0.2.0,>=0.1.32 (from langgraph)
  Downloading langgraph_sdk-0.1.33-py3-none-any.whl.metadata (1.8 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain_core)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting jsonpointer>=1.9 (from jsonpatch<2.0,>=1.33->langchain_core)
  Dow

In [3]:
!pip install langchain_groq
from langchain_groq import ChatGroq

Collecting langchain_groq
  Downloading langchain_groq-0.2.0-py3-none-any.whl.metadata (2.9 kB)
Collecting groq<1,>=0.4.1 (from langchain_groq)
  Downloading groq-0.11.0-py3-none-any.whl.metadata (13 kB)
Downloading langchain_groq-0.2.0-py3-none-any.whl (14 kB)
Downloading groq-0.11.0-py3-none-any.whl (106 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m106.5/106.5 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: groq, langchain_groq
Successfully installed groq-0.11.0 langchain_groq-0.2.0


In [6]:
from google.colab import userdata
GROQ_API_KEY = userdata.get('GROQ_API_KEY')

llm = ChatGroq(model= "llama-3.1-70b-versatile", temperature=0, api_key=GROQ_API_KEY)

In [7]:
from google.colab import userdata
NEWS_API_KEY = userdata.get('NEWS_API_KEY')

In [8]:
import requests
from typing import List, Dict

def fetch_news_articles(topic: str) -> List[Dict]:
    """Fetch the latest news articles on a given topic."""
    url = f"https://newsapi.org/v2/everything?q={topic}&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        raise Exception("Error fetching news articles")


In [27]:
# prompt: Real-Time News Summarization and Correction Agent
# Build an AI agent that retrieves the latest news articles on a topic, summarizes them, and corrects inaccuracies by cross-referencing other news sources. The system should grade the reliability of the sources and update the summary with more reliable information when needed.
# Add a dynamic retrieval mechanism that adapts based on the recency of events

from langchain_groq import ChatGroq
from google.colab import userdata
import requests
from typing import List, Dict
from datetime import datetime, timedelta

!pip install langchain langgraph langchain_core
!pip install langchain_groq

GROQ_API_KEY = userdata.get('GROQ_API_KEY')

llm = ChatGroq(model= "llama-3.1-70b-versatile", temperature=0, api_key=GROQ_API_KEY)
NEWS_API_KEY = userdata.get('NEWS_API_KEY')

from langgraph.graph import END, START, StateGraph, MessagesState
from typing import TypedDict

class NewsState(TypedDict):
    """Schema for the state of the news summarization agent."""
    topic: str
    articles: list
    summary: str
    sources_reliability: dict

class NewsSummarizationAgent(StateGraph):
    def __init__(self, topic: str, llm):
        # The NewsState schema is passed here to define the state structure.
        super().__init__(state_schema=NewsState)
        self.topic = topic
        self.llm = llm
        self.articles = []
        self.summary = ""
        self.sources_reliability = {}

        # Define states and transitions
        self.add_state(START, self.fetch_news)
        self.add_state("Summarize", self.summarize_news)
        self.add_state("CrossReference", self.cross_reference_sources)
        self.add_state("UpdateSummary", self.update_summary)
        self.add_state(END, None)  # End state has no function

        self.add_transition(START, "Summarize", lambda: True)  # Transition to Summarize
        self.add_transition("Summarize", "CrossReference", lambda: True)  # Transition to CrossReference
        self.add_transition("CrossReference", "UpdateSummary", lambda: True)  # Transition to UpdateSummary
        self.add_transition("UpdateSummary", END, lambda: True)  # Transition to END


    def fetch_news(self, messages_state: MessagesState):
        """Fetch the latest news articles."""
        try:
            self.articles = fetch_news_articles(self.topic)
            messages_state.add_message(
                f"Fetched {len(self.articles)} articles related to {self.topic}."
            )
            return "Summarize"  # Move to the next state
        except Exception as e:
            messages_state.add_message(f"Error fetching news articles: {e}")
            return END  # Stop the process if there's an error


    def summarize_news(self, messages_state: MessagesState):
        """Summarize the fetched news articles."""
        if not self.articles:
            messages_state.add_message("No articles to summarize.")
            return END
        article_texts = [article.get("content", "") for article in self.articles]
        prompt = f"""
        Please provide a concise summary of the following news articles related to {self.topic}:
        {article_texts}
        """

        summary = self.llm.predict(prompt)
        self.summary = summary

        messages_state.add_message(f"Generated initial summary: {self.summary}")

        return "CrossReference"


    def cross_reference_sources(self, messages_state: MessagesState):
        """Cross-reference sources and assess reliability."""
        # This part can be expanded to use more sophisticated techniques for verifying information.
        for article in self.articles:
            source = article.get("source", {}).get("name")
            if source:
                self.sources_reliability[source] = 0.7  # Placeholder for a more robust reliability score

        messages_state.add_message("Cross-referenced sources for reliability.")

        return "UpdateSummary"


    def update_summary(self, messages_state: MessagesState):
        """Update the summary with more reliable information if needed."""
        # Implement logic to identify inaccuracies and update the summary using the reliability scores.
        # (This would require analyzing the summary in comparison to other sources)
        messages_state.add_message("Updated summary with reliable information.")

        return END

def fetch_news_articles(topic: str, days_back: int = 1) -> List[Dict]:
    """Fetch the latest news articles on a given topic within a specified time frame."""
    today = datetime.now()
    from_date = (today - timedelta(days=days_back)).strftime('%Y-%m-%d')
    url = f"https://newsapi.org/v2/everything?q={topic}&from={from_date}&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        raise Exception("Error fetching news articles")

def summarize_news(articles: List[Dict]) -> str:
    """Summarize the given news articles."""
    article_texts = [article.get("content", "") for article in articles]
    prompt = f"Summarize the following news articles:\n{''.join(article_texts)}"
    summary = llm.predict(prompt)
    return summary

def verify_accuracy(summary: str, articles: List[Dict]) -> str:
    """Verify the accuracy of the summary by cross-referencing other sources."""
    # Implement logic to cross-reference with other sources and identify discrepancies
    # You can use a knowledge graph or another news API to gather more information
    # and identify inconsistencies or biases in the initial summary.
    # Example: Check for conflicting claims or compare with information from Wikipedia or other reliable sources.
    # For now, we simply return the initial summary.
    return summary

def grade_source_reliability(article: Dict) -> int:
    """Assign a reliability score to the source of a news article."""
    # Implement logic to grade the reliability of the news source based on its reputation and bias.
    # You can use a pre-defined list of reliable sources, analyze the source's history,
    # or use external services that rate news sources.
    # For now, we assign a basic reliability score based on the source's domain name.
    source = article.get("source")
    # Check if 'source' exists and is a dictionary before accessing 'id'
    if source and isinstance(source, dict):
        source_domain = source.get("id", "")
        # Check if source_domain is a string before checking for substrings
        if isinstance(source_domain, str):
            if "bbc" in source_domain or "reuters" in source_domain:
                return 5  # High reliability
            elif "nytimes" in source_domain or "apnews" in source_domain:
                return 4  # Medium-high reliability
            else:
                return 2  # Low reliability
        else:
            return 2  # Default to low reliability if source_domain is not a string
    else:
        return 2  # Default to low reliability if source information is missing

def update_summary_with_reliable_info(summary: str, articles: List[Dict]) -> str:
    """Update the summary with more reliable information from the given articles."""
    # Prioritize articles from sources with higher reliability scores.
    reliable_articles = sorted(articles, key=lambda x: grade_source_reliability(x), reverse=True)
    # Implement logic to update the summary with more reliable information
    # You can replace conflicting claims or add missing details from reliable sources.
    # For now, we simply return the initial summary.
    return summary

# Example usage:
topic = "AI"
articles = fetch_news_articles(topic, days_back=2)
summary = summarize_news(articles)
verified_summary = verify_accuracy(summary, articles)
updated_summary = update_summary_with_reliable_info(verified_summary, articles)

updated_summary





In [41]:
from langchain_groq import ChatGroq
from google.colab import userdata
import requests
from typing import List, Dict
from datetime import datetime, timedelta
from langgraph.graph import StateGraph
from typing import TypedDict

!pip install langchain langgraph langchain_core
!pip install langchain_groq

# API keys
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
NEWS_API_KEY = userdata.get('NEWS_API_KEY')


# Instantiate the LLM
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, api_key=GROQ_API_KEY)

class NewsState(TypedDict):
    """Schema for the state of the news summarization agent."""
    topic: str
    articles: list
    summary: str
    sources_reliability: dict

class SimpleMessagesState:
    """A simplified state class to store messages for the agent's workflow."""
    def __init__(self):
        self.messages = []

    def add_message(self, message: str):
        self.messages.append(message)

class NewsSummarizationAgent(StateGraph):  # Inheriting from StateGraph
    def __init__(self, topic: str, llm):
        super().__init__(state_schema=NewsState)
        self.topic = topic
        self.llm = llm
        self.articles = []
        self.summary = ""
        self.sources_reliability = {}

        # Define states as a dictionary
        # Instead of StateGraph.START and StateGraph.END, use '__start__' and '__end__'
        self.states = {
            '__start__': self.fetch_news,  # Change here
            "Summarize": self.summarize_news,
            "CrossReference": self.cross_reference_sources,
            "UpdateSummary": self.update_summary,
            '__end__': None,  # Change here
        }

        # Define transitions as a dictionary
        # Update transitions to use '__start__' and '__end__'
        self.transitions = {
            ('__start__', "Summarize"): lambda: True,  # Change here
            ("Summarize", "CrossReference"): lambda: True,
            ("CrossReference", "UpdateSummary"): lambda: True,
            ("UpdateSummary", '__end__'): lambda: True,  # Change here
        }

    def fetch_news(self, messages_state: SimpleMessagesState):
        """Fetch the latest news articles."""
        try:
            self.articles = fetch_news_articles(self.topic)
            messages_state.add_message(
                f"Fetched {len(self.articles)} articles related to {self.topic}."
            )
            return "Summarize"
        except Exception as e:
            messages_state.add_message(f"Error fetching news articles: {e}")
            return '__end__'  # Change here

    def summarize_news(self, messages_state: SimpleMessagesState):
        """Summarize the fetched news articles."""
        if not self.articles:
            messages_state.add_message("No articles to summarize.")
            return '__end__'  # Change here

        article_texts = [article.get("content", "") for article in self.articles]
        prompt = f"""
        Please provide a concise summary of the following news articles related to {self.topic}:
        {article_texts}
        """

        summary = self.llm.predict(prompt)
        self.summary = summary
        messages_state.add_message(f"Generated initial summary: {self.summary}")

        return "CrossReference"

    def cross_reference_sources(self, messages_state: SimpleMessagesState):
        """Cross-reference sources and assess reliability."""
        for article in self.articles:
            source = article.get("source", {}).get("name")
            if source:
                self.sources_reliability[source] = 0.7  # Placeholder for reliability score

        messages_state.add_message("Cross-referenced sources for reliability.")
        return "UpdateSummary"

    def update_summary(self, messages_state: SimpleMessagesState):
        """Update the summary with more reliable information if needed."""
        messages_state.add_message("Updated summary with reliable information.")
        return StateGraph.END

def fetch_news_articles(topic: str, days_back: int = 1) -> List[Dict]:
    """Fetch the latest news articles on a given topic within a specified time frame."""
    today = datetime.now()
    from_date = (today - timedelta(days=days_back)).strftime('%Y-%m-%d')
    url = f"https://newsapi.org/v2/everything?q={topic}&from={from_date}&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        raise Exception("Error fetching news articles")

def grade_source_reliability(article: Dict) -> int:
    """Assign a reliability score to the source of a news article."""
    source = article.get("source")
    if source and isinstance(source, dict):
        source_domain = source.get("id", "")
        if isinstance(source_domain, str):
            if "bbc" in source_domain or "reuters" in source_domain:
                return 5  # High reliability
            elif "nytimes" in source_domain or "apnews" in source_domain:
                return 4  # Medium-high reliability
            else:
                return 2  # Low reliability
        else:
            return 2
    else:
        return 2

def update_summary_with_reliable_info(summary: str, articles: List[Dict]) -> str:
    """Update the summary with more reliable information from the given articles."""
    reliable_articles = sorted(articles, key=lambda x: grade_source_reliability(x), reverse=True)
    return summary

# Example usage:
topic = "AI"
agent = NewsSummarizationAgent(topic, llm)
messages_state = SimpleMessagesState()
messages_state.messages




[]

In [44]:
from langchain_groq import ChatGroq
from google.colab import userdata
import requests
from typing import List, Dict
from datetime import datetime, timedelta
from langgraph.graph import StateGraph
from typing import TypedDict

!pip install langchain langgraph langchain_core
!pip install langchain_groq

# API keys
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
NEWS_API_KEY = userdata.get('NEWS_API_KEY')

# Instantiate the LLM
llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, api_key=GROQ_API_KEY)

class NewsState(TypedDict):
    """Schema for the state of the news summarization agent."""
    topic: str
    articles: list
    summary: str
    sources_reliability: dict

class SimpleMessagesState:
    """A simplified state class to store messages for the agent's workflow."""
    def __init__(self):
        self.messages = []

    def add_message(self, message: str):
        self.messages.append(message)

class NewsSummarizationAgent(StateGraph):  # Inheriting from StateGraph
    def __init__(self, topic: str, llm):
        super().__init__(state_schema=NewsState)
        self.topic = topic
        self.llm = llm
        self.articles = []
        self.summary = ""
        self.sources_reliability = {}

        # Define states as a dictionary
        self.states = {
            "Start": self.fetch_news,
            "Summarize": self.summarize_news,
            "CrossReference": self.cross_reference_sources,
            "UpdateSummary": self.update_summary,
            "End": None,
        }

        # Define transitions as a dictionary
        self.transitions = {
            ("Start", "Summarize"): lambda: True,
            ("Summarize", "CrossReference"): lambda: True,
            ("CrossReference", "UpdateSummary"): lambda: True,
            ("UpdateSummary", "End"): lambda: True,
        }

    def run(self, messages_state: SimpleMessagesState):
        """Run the agent through its states and transitions."""
        current_state = "Start"
        while current_state != "End":
            state_func = self.states.get(current_state)
            if not state_func:
                break
            next_state = state_func(messages_state)
            transition = (current_state, next_state)
            if transition in self.transitions and self.transitions[transition]():
                current_state = next_state
            else:
                break

    def fetch_news(self, messages_state: SimpleMessagesState):
        """Fetch the latest news articles."""
        try:
            self.articles = fetch_news_articles(self.topic)
            messages_state.add_message(
                f"Fetched {len(self.articles)} articles related to {self.topic}."
            )
            return "Summarize"
        except Exception as e:
            messages_state.add_message(f"Error fetching news articles: {e}")
            return "End"

    def summarize_news(self, messages_state: SimpleMessagesState):
        """Summarize the fetched news articles."""
        if not self.articles:
            messages_state.add_message("No articles to summarize.")
            return "End"

        article_texts = [article.get("content", "") for article in self.articles]
        prompt = f"""
        Please provide a concise summary of the following news articles related to {self.topic}:
        {article_texts}
        """

        summary = self.llm.predict(prompt)
        self.summary = summary
        messages_state.add_message(f"Generated initial summary: {self.summary}")

        return "CrossReference"

    def cross_reference_sources(self, messages_state: SimpleMessagesState):
        """Cross-reference sources and assess reliability."""
        for article in self.articles:
            source = article.get("source", {}).get("name")
            if source:
                self.sources_reliability[source] = 0.7  # Placeholder for reliability score

        messages_state.add_message("Cross-referenced sources for reliability.")
        return "UpdateSummary"

    def update_summary(self, messages_state: SimpleMessagesState):
        """Update the summary with more reliable information if needed."""
        messages_state.add_message("Updated summary with reliable information.")
        return "End"

def fetch_news_articles(topic: str, days_back: int = 1) -> List[Dict]:
    """Fetch the latest news articles on a given topic within a specified time frame."""
    today = datetime.now()
    from_date = (today - timedelta(days=days_back)).strftime('%Y-%m-%d')
    url = f"https://newsapi.org/v2/everything?q={topic}&from={from_date}&apiKey={NEWS_API_KEY}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get("articles", [])
    else:
        raise Exception("Error fetching news articles")

# Define the function to display formatted output
def display_pretty_output(messages):
    """Format and display messages in a readable format."""
    for message in messages:
        # Add line breaks and formatting for each section in the summary
        if "Generated initial summary" in message:
            print("\n--- AI News Summary ---\n")
            # Extract the summary part from the message
            summary = message.split(": ", 1)[1]
            print(summary.replace('\n', '\n    '))  # Indent each line
        elif "Fetched" in message:
            print(f"\n{message}\n")  # Print the fetched articles message
        elif "Cross-referenced" in message:
            print("\n--- Source Reliability ---")
            print(f"\n{message}\n")
        elif "Updated summary" in message:
            print("\n--- Finalized Summary ---")
            print(f"\n{message}\n")

# Example usage of the NewsSummarizationAgent and displaying the output
topic = "AI"  # Define your topic
agent = NewsSummarizationAgent(topic, llm)  # Instantiate the agent with topic and llm
messages_state = SimpleMessagesState()  # Create an instance of messages state

agent.run(messages_state)  # Run the agent workflow

# Now display the formatted output
display_pretty_output(messages_state.messages)


Fetched 100 articles related to AI.


--- AI News Summary ---

Here's a concise summary of the news articles related to AI:
    
    **Google and AI Search**
    
    * Google's search results have become less reliable, with spammy answers and AI-generated content dominating the first few pages.
    * Google has released a new tool called SynthID Text to watermark and detect AI-generated text.
    
    **AI in Education**
    
    * A student warns about the potential consequences of using AI tools to cheat in university.
    * AI tools can be useful for teaching and learning, but they cannot replace human learning.
    
    **AI in Business**
    
    * Jensen Huang, CEO of Nvidia, believes that AI can enhance jobs but won't replace humans entirely.
    * AI can be used to automate tasks, but it's essential to consider the potential consequences of automation.
    
    **AI in Healthcare**
    
    * Healthcare data is messy, and AI can be used to make sense of it.
    * AI can be us