#Project: Monitoring of Relevant Financial News
April 2025


### Task Description:
 Develop a simple AI agent system using
CrewAIto summarize financial news. The agent
should extract essential information, and accurately
recognize and tag entities, specifically focusing on
companies and countries. The final deliverable
demonstrates the agent’s ability to filter and organize
financial news effectively, highlighting critical insights
related to the identified entities.

The final output is presented in a clearly structured JSON format, with the following fields:
-  date: Date of the article.
-  entity: Identified entity (must be a company or country).
-  summary: Relevant summary of the entity-related content.
-  sentiment: positive | neutral | negative


# Setting up the environment

## Install all necessary packages

In [85]:
!pip install crewai           # used for Crew AI
!pip install langchain_openai # used for Crew AI
!pip install crewai_tools     # used for Crew AI
!pip install requests         # used to retrieve articles with NewsData
!pip install newspaper3k      # used to retrieve articles with NewsData
!pip install beautifulsoup4   # used to retrieve articles with NewsData
!pip install lxml_html_clean  # used to retrieve articles with NewsData



## Import necessary modules

In [86]:
# Imports for Crew AI
from crewai import Agent, Task, Crew, Process, LLM
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
from crewai.project import CrewBase, agent, crew
from langchain_openai import ChatOpenAI
from crewai_tools import TXTSearchTool, FileReadTool

from google.colab import userdata   # necesssary to retrieve the secret keys
import os                 # necesssary to includ secret keys in the environmnt

# Imports for output formats
import json
import csv
from datetime import datetime

import time # Used to measure execution time in the perfomrance evaluation

# Imports for the bonus2 retrieve NewsData articles
import requests
import re
from bs4 import BeautifulSoup
from newspaper import Article

## Import environment variables (MODIFY WITH YOUR KEYS)

I commented out the Gemini, Groq and Newsdata API keys so that you can run the notebook even without those keys

In [87]:
#Set API Keys as environment variables
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["CHROMA_OPENAI_API_KEY"]= userdata.get('OPENAI_API_KEY')
# os.environ["GEMINI_API_KEY"] = userdata.get('GOOGLE_API_KEY')
# os.environ["GROQ_API_KEY"] = userdata.get('GROQ_API_KEY')
# os.environ["NEWSDATA_API_KEY"] = userdata.get('NEWSDATA_API_KEY')

## To keep track of performance/usage data

In [88]:
# In a first version, I saved the perfomrance data in a file in my Google Drive
 # (for persistance)

# Instead in this final version I will save all performance data
# in the following dataframe
import pandas as pd

# Create an empty DataFrame with the desired columns
results_df_empty = pd.DataFrame(columns=['article_file', 'article_url', 'article_size',
                                   'llm_used', 'tokens_used', 'execution_time',
                                   'nber_entities', 'first_entity',
                                   'first_sentiment', 'comment'])
my_results_df = results_df_empty

In [89]:
def append_to_result_df(article_file, article_url, article_size,
                        llm_used, tokens_used, execution_time,
                        crew_json_output,
                        comment = ""):
    """
    Appends a new row to the results_df DataFrame.

    Args:
        article_file: The path to the article file.
        article_url: The URL of the article (if available).
        article_size: The size of the article in characters.
        llm_used: The name of the LLM used.
        tokens_used: The number of tokens used during analysis.
        execution_time: The execution time in seconds.
        crew_json_output: The JSON output from the crew.
    """
    print("Extracting the results before appending them ")
    global my_results_df  # Access the global results_df DataFrame

    # Extract information from crew_json_output
    try:
        data = json.loads(crew_json_output)
        nber_entities = len(data)
        if nber_entities > 0:
            first_entity = data[0]['entity']
            first_sentiment = data[0]['sentiment']
        else:
            first_entity = None
            first_sentiment = None
    except (KeyError, IndexError):
        nber_entities = None
        first_entity = None
        first_sentiment = None



    new_row = pd.DataFrame([[article_file, article_url, article_size,
                             llm_used, tokens_used, execution_time,
                             nber_entities, first_entity, first_sentiment,
                             comment]],
                          columns=my_results_df.columns)
    my_results_df = pd.concat([my_results_df, new_row], ignore_index=True)
    print(f"Data appended to my_result_df:{new_row}")

In [90]:
def get_file_size(file_path):
    """
    Calculates the size of a file in characters (including spaces and newlines).

    Args:
        file_path: The path to the file.

    Returns:
        The size of the file in characters.
    """

    with open(file_path, 'r') as file:
        content = file.read()
        file_size = len(content)

    return file_size

# Text source(s)

# Incorporate the crew in a function

I have fine-tuned the agents and taks through trial and error that I removed from the notebook. The result of my work is incorporated in the following single function, taking into input the article and the LLM, and providing the result of the work and metrics.

In [91]:
openai_llm = LLM(
    model="gpt-4.1-nano", #Required GPT Model for the project
    temperature=0
) #Creativity of the response

In [92]:
# incorporate into a single function article_analysis the 4 agents and tasks
# modified to force the output in JSON under the same format.

from pydantic import BaseModel
from typing import List
from datetime import date

class EntitySummary(BaseModel):
    date: str
    entity: str
    summary: str
    sentiment: str

class EntitySummaryList(BaseModel):
    items: List[EntitySummary]

def article_analysis(article, llm):
    """
    Analyzes a financial news article using a CrewAI-based system.

    Args:
        article: The article text as a string.
        llm: The LLM to use for the analysis.

    Returns:
        A tuple containing:
            - The analysis result as a JSON string.
            - A dictionary of AI metrics.
    """

    # --- Agent Definitions (same as before) ---
    date_identifier = Agent(
        role='Date Identifier',
        goal='Accurately identify publication dates in texts',
        backstory= 'Expert in extracting and validating dates from various text formats',
        llm=llm,
        Memory=False
    )
    entity_identifier = Agent(
        role='Entity Identifier',
        goal='Identify and classify all companies and countries mentioned in texts',
        backstory='Specialist in entity recognition, to identify only companies and countries',
        llm = llm,
        Memory = False
    )
    entity_summarizer = Agent(
        role='Entity Context Summarizer',
        goal='Create focused summaries about specific entities from texts',
        backstory='Expert in extracting and summarizing relevant information about specific entities',
        llm = llm,
        Memory = False
    )
    sentiment_identifier = Agent(
        role='Sentiment Identifier',
        goal='Identify the general sentiment towards entities mentioned in texts',
        backstory='Specialist in understanding sentiments and nuances in texts',
        llm = llm,
    Memory = False
    )

    # --- Task Definitions (same as before) ---
    date_task = Task(
        description="Identify the publication date in the following article: \n" + article,
        agent=date_identifier,
        expected_output="a string with the date in YYYY-MM-DD format",
    )
    entity_identification_task = Task(
        description="""Identify the main companies and countries mentioned     in the initial article. List them separately by order of significance in the         article. Do not consider people as entities and do not select the         article's news organization as an entity. Here is the text:\n   """ ,
        agent=entity_identifier,
        context= [date_task],  # Access to date task output if needed
        expected_output="""a list of string with the name of all entities         including their ticker code if they are a traded company""",
    )
    entity_summary_task = Task(
        description="""
        For each entity identified in the previous task, based on the article:
        1. Create a focused summary of what the text says about it
        2. Include relevant context and relationships""" ,
        agent=entity_summarizer,
        #context=[date_task, entity_identification_task],  # Access to date and identified entities
        expected_output="A short summary (less than 600 characters) of what the article says about the entity.",
    )
    sentiment_identification_task = Task(
        description="Identify the sentiment regarding each of the entities mentionned in the article as either positive, neutral or negative. Here is the text:\n " + article,
        agent=sentiment_identifier,
        context= [date_task, entity_identification_task, entity_summary_task],  # Access to date task output if needed
        expected_output="A list of JSON objects, containing a 'date' in 'YYYY-MM-DD' string format, 'entity', 'summary' and 'sentiment' for each analyzed entity.",
        output_json=EntitySummaryList
    )

    # --- Crew Definition (same as before) ---
    crew = Crew(
        agents=[date_identifier, entity_identifier, entity_summarizer, sentiment_identifier],
        tasks=[date_task, entity_identification_task, entity_summary_task, sentiment_identification_task],
        process=Process.sequential,
        verbose=1,
    )
    crew.reset_memories(command_type='all')

    start_time = time.time()
    crew_output = crew.kickoff()
    end_time = time.time()

 #   analysis_result = json.dumps(crew_output.json_dict, indent=2) if crew_output.json_dict else crew_output.raw
    analysis_result = json.dumps(crew_output.json_dict["items"])

    ai_metrics = {
        "llm_used": llm.model,
        "tokens_used": crew.usage_metrics,
        "execution_time": end_time - start_time
    }

    return analysis_result, ai_metrics


## Looping through a list of articles (ADD YOUR ARTICLES IN THE file_paths LIST)

In [93]:
# Loop through all my test articles.


file_paths = ['knowledge/Yahoo Finance 2.txt',
              'knowledge/Yahoo Finance 3.txt',
              'knowledge/Yahoo Finance.txt',
              'knowledge/Reuters.txt'
              ]
urls = ["url1", "url2", "url3",
        "url4", "url5"
        ]
my_results_df = results_df_empty
for file_path, url in zip(file_paths, urls):
  try:
    with open(file_path, 'r') as file:
      article = file.read()
   # Clean the \r character that is an issue with Google AI
    article = article.replace('\r', '').replace('\n', ' ')

    article_size = get_file_size(file_path)
    analysis_result, ai_metrics = article_analysis(article, openai_llm)
    print("#################### End of crew work. Here is the result : ")
    print(analysis_result)

    print("#################### Let s append those results : ")
    append_to_result_df(
        article_file=file_path,
        article_url=url,
        article_size=article_size,
        llm_used=ai_metrics["llm_used"],
        tokens_used=ai_metrics["tokens_used"],
        execution_time=ai_metrics["execution_time"],
        crew_json_output=analysis_result
    )
    print("#################### result appended to my_result ")

  except FileNotFoundError:
    print(f"Error: File not found at {file_path}")
  except Exception as e:
    print(f"An error occurred while processing {file_path}: {e}")


[1m[93m 
[2025-04-28 11:30:39][INFO]: all memory has been reset[00m


[1m[95m# Agent:[00m [1m[92mDate Identifier[00m
[95m## Task:[00m [92mIdentify the publication date in the following article: 
Yahoo Finance Google stock rises after it beats on earnings, raises dividend, and authorizes $70 billion in buybacks  Daniel Howley Daniel Howley · Technology Editor Updated Fri, April 25, 2025 at 3:36 PM GMT+2 2 min read  In This Article: StockStory Top Pick MSFT +0.06%  GOOG +2.70%  AMZN +0.16%  Alphabet (GOOG, GOOGL) stock rose over 3% Friday after the company reported strong fiscal first quarter earnings, beating expectations. It also announced a 5% dividend increase and $70 billion in stock buybacks.  For Q1, Alphabet reported earnings per share (EPS) of $2.81 on revenue of $90.2 billion. Analysts were expecting EPS of $2.01 on revenue of $89.1 billion, according to Bloomberg consensus estimates.  NasdaqGS - Nasdaq Real Time Price • USD Alphabet Inc. (GOOG)  Follow View Quote Details 165.84 +4.69 +(2.70%) As of 10:02:02 AM EDT. Market Open. Advanced

[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Task:[00m [92mIdentify the main companies and countries mentioned     in the initial article. List them separately by order of significance in the         article. Do not consider people as entities and do not select the         article's news organization as an entity. Here is the text:
   [00m


[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Final Answer:[00m [92m
Companies: 
- No companies explicitly mentioned in the provided text.

Countries: 
- No countries explicitly mentioned in the provided text.[00m




[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Task:[00m [92m
        For each entity identified in the previous task, based on the article:
        1. Create a focused summary of what the text says about it
        2. Include relevant context and relationships[00m


[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Final Answer:[00m [92m
The provided text does not mention any specific companies or countries, so there are no entities to summarize or establish relationships for.[00m




[1m[95m# Agent:[00m [1m[92mSentiment Identifier[00m
[95m## Task:[00m [92mIdentify the sentiment regarding each of the entities mentionned in the article as either positive, neutral or negative. Here is the text:
 Yahoo Finance Google stock rises after it beats on earnings, raises dividend, and authorizes $70 billion in buybacks  Daniel Howley Daniel Howley · Technology Editor Updated Fri, April 25, 2025 at 3:36 PM GMT+2 2 min read  In This Article: StockStory Top Pick MSFT +0.06%  GOOG +2.70%  AMZN +0.16%  Alphabet (GOOG, GOOGL) stock rose over 3% Friday after the company reported strong fiscal first quarter earnings, beating expectations. It also announced a 5% dividend increase and $70 billion in stock buybacks.  For Q1, Alphabet reported earnings per share (EPS) of $2.81 on revenue of $90.2 billion. Analysts were expecting EPS of $2.01 on revenue of $89.1 billion, according to Bloomberg consensus estimates.  NasdaqGS - Nasdaq Real Time Price • USD Alphabet Inc. (GOOG)  Foll

#################### End of crew work. Here is the result : 
[{"date": "2025-04-25", "entity": "Google (Alphabet Inc.)", "summary": "Google's stock rose over 3% after reporting strong fiscal first quarter earnings, beating expectations, and announcing a 5% dividend increase along with $70 billion in stock buybacks. The company also reported higher earnings per share and revenue compared to last year, with advertising revenue exceeding expectations. However, it faces legal challenges including antitrust losses and a lawsuit in the UK, which could impact its business operations.", "sentiment": "positive"}]
#################### Let s append those results : 
Extracting the results before appending them 
Data appended to my_result_df:                    article_file article_url  article_size      llm_used  \
0  knowledge/Yahoo Finance 2.txt        url1          2627  gpt-4.1-nano   

                                         tokens_used  execution_time  \
0  total_tokens=2550 prompt_tokens=2

  my_results_df = pd.concat([my_results_df, new_row], ignore_index=True)


[1m[95m# Agent:[00m [1m[92mDate Identifier[00m
[95m## Task:[00m [92mIdentify the publication date in the following article: 
Intel beats on Q1 expectations, but poor Q2 forecast sends stock sliding  Daniel Howley Daniel Howley · Technology Editor Updated Fri, April 25, 2025 at 3:39 PM GMT+2 2 min read  In This Article: INTC -7.82%  Intel's (INTC) stock fell over 7% Friday after a disappointing outlook overshadowed an earnings beat.  In the coming quarter, Intel says it expects second quarter revenue of between $11.2 billion and $12.4 billion. Wall Street was looking for $12.8 billion.  “The current macro environment is creating elevated uncertainty across the industry, which is reflected in our outlook. We are taking a disciplined and prudent approach to support continued investment in our core products and foundry businesses while maximizing operational cost savings and capital efficiency,” Intel CFO David Zinsner said in a statement.  Shares are off over 40% in the past 12 m

[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Task:[00m [92mIdentify the main companies and countries mentioned     in the initial article. List them separately by order of significance in the         article. Do not consider people as entities and do not select the         article's news organization as an entity. Here is the text:
   [00m


[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Final Answer:[00m [92m
Companies: 
- None explicitly mentioned in the provided text.

Countries: 
- None explicitly mentioned in the provided text.[00m




[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Task:[00m [92m
        For each entity identified in the previous task, based on the article:
        1. Create a focused summary of what the text says about it
        2. Include relevant context and relationships[00m


[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Final Answer:[00m [92m
The provided text does not mention any specific companies or countries, so there are no entities to summarize or establish relationships for.[00m




[1m[95m# Agent:[00m [1m[92mSentiment Identifier[00m
[95m## Task:[00m [92mIdentify the sentiment regarding each of the entities mentionned in the article as either positive, neutral or negative. Here is the text:
 Intel beats on Q1 expectations, but poor Q2 forecast sends stock sliding  Daniel Howley Daniel Howley · Technology Editor Updated Fri, April 25, 2025 at 3:39 PM GMT+2 2 min read  In This Article: INTC -7.82%  Intel's (INTC) stock fell over 7% Friday after a disappointing outlook overshadowed an earnings beat.  In the coming quarter, Intel says it expects second quarter revenue of between $11.2 billion and $12.4 billion. Wall Street was looking for $12.8 billion.  “The current macro environment is creating elevated uncertainty across the industry, which is reflected in our outlook. We are taking a disciplined and prudent approach to support continued investment in our core products and foundry businesses while maximizing operational cost savings and capital efficiency,

#################### End of crew work. Here is the result : 
[{"date": "2025-04-25", "entity": "Intel", "summary": "Intel reported better-than-expected earnings for Q1 but issued a poor forecast for Q2, leading to a significant stock decline of over 7%. The company is navigating macroeconomic uncertainties, trade tensions, and strategic challenges, including potential changes in its manufacturing and foundry operations.", "sentiment": "negative"}, {"date": "2025-04-25", "entity": "Wall Street", "summary": "Wall Street analysts had expectations for Intel's revenue and EPS, which were mostly exceeded in Q1, but the overall market sentiment turned negative due to the disappointing Q2 forecast and broader economic concerns.", "sentiment": "neutral"}, {"date": "2025-04-25", "entity": "President Trump", "summary": "President Trump\u2019s trade policies, including potential tariffs on semiconductors and systems built in China, pose risks to Intel\u2019s operations and supply chain, creating u

[1m[95m# Agent:[00m [1m[92mDate Identifier[00m
[95m## Task:[00m [92mIdentify the publication date in the following article: 
Yahoo Finance Slate: Introducing the Bezos-backed EV pickup for the masses With only one form factor and color, the $25K cheap EV is aimed at regular Americans but embraces DIY culture with a myriad of aftermarket options.  Pras Subramanian Pras Subramanian · Senior Reporter Fri, April 25, 2025 at 4:21 AM GMT+2 4 min read  In This Article: RIVN +0.70%  AMZN +3.29%  “We built it, you make it.”  That’s the motto for Slate, a new American electric vehicle company, which just revealed its launch model and is backed by ex-Amazon (AMZN) executives, including, reportedly, Jeff Bezos’s family office  Slate said its EV pickup, a bare-bones truck with a footprint smaller than a Ford (F) Maverick, will cost somewhere in the mid-$20K range, and it will come in any color you want — as long as it’s slate gray, that is. The option-challenged vehicle offers a choice of 

[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Task:[00m [92mIdentify the main companies and countries mentioned     in the initial article. List them separately by order of significance in the         article. Do not consider people as entities and do not select the         article's news organization as an entity. Here is the text:
   [00m


[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Final Answer:[00m [92m
Companies: 
- None explicitly mentioned in the provided text.

Countries: 
- None explicitly mentioned in the provided text.[00m




[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Task:[00m [92m
        For each entity identified in the previous task, based on the article:
        1. Create a focused summary of what the text says about it
        2. Include relevant context and relationships[00m


[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Final Answer:[00m [92m
The provided text does not mention any specific companies or countries, so there are no entities to summarize or establish relationships for.[00m




[1m[95m# Agent:[00m [1m[92mSentiment Identifier[00m
[95m## Task:[00m [92mIdentify the sentiment regarding each of the entities mentionned in the article as either positive, neutral or negative. Here is the text:
 Yahoo Finance Slate: Introducing the Bezos-backed EV pickup for the masses With only one form factor and color, the $25K cheap EV is aimed at regular Americans but embraces DIY culture with a myriad of aftermarket options.  Pras Subramanian Pras Subramanian · Senior Reporter Fri, April 25, 2025 at 4:21 AM GMT+2 4 min read  In This Article: RIVN +0.70%  AMZN +3.29%  “We built it, you make it.”  That’s the motto for Slate, a new American electric vehicle company, which just revealed its launch model and is backed by ex-Amazon (AMZN) executives, including, reportedly, Jeff Bezos’s family office  Slate said its EV pickup, a bare-bones truck with a footprint smaller than a Ford (F) Maverick, will cost somewhere in the mid-$20K range, and it will come in any color you want 

#################### End of crew work. Here is the result : 
[{"date": "2025-04-25", "entity": "Slate", "summary": "Slate is a new American electric vehicle company backed by ex-Amazon executives and Jeff Bezos's family office, aiming to produce affordable, customizable EV pickups for the working class in America, emphasizing DIY culture and cost-efficiency.", "sentiment": "positive"}, {"date": "2025-04-25", "entity": "Jeff Bezos", "summary": "Jeff Bezos is involved as a passive investor through his family office, supporting Slate's mission to serve working-class Americans with affordable EVs, though he is not directly involved in daily operations.", "sentiment": "neutral"}, {"date": "2025-04-25", "entity": "Amazon", "summary": "Amazon is mentioned as the former employer of Slate's founders and as an investor through Jeff Bezos's family office, with no direct involvement in Slate's operations, but its EV investments like Rivian are contrasted with Slate's approach.", "sentiment": "neut

[1m[95m# Agent:[00m [1m[92mDate Identifier[00m
[95m## Task:[00m [92mIdentify the publication date in the following article: 
Reuters STMicro forecasts better quarterly sales, says no tariff panic yet  FILE PHOTO: The logo of STMIcroelectronics is seen outside a company building in Montrouge · Reuters Nathan Vifflin Thu, April 24, 2025 at 7:16 AM GMT+2 2 min read  In This Article: STMPA.PA +1.09%  By Nathan Vifflin  (Reuters) -STMicroelectronics said U.S. tariffs had so far not changed its conversations with customers, after the chipmaker forecast rising second-quarter sales from the yearly low seen in the first three months of 2025.  "We have not seen any panic from customers or immediate reaction," President and CEO Jean-Marc Chery told analysts on Thursday.  But while STMicro is well equipped to face the potential tariffs, it is wary of their impact on global automotive production, he said.  "It's urgent to wait and see, because adaptation of the supply chain or decision to 

[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Task:[00m [92mIdentify the main companies and countries mentioned     in the initial article. List them separately by order of significance in the         article. Do not consider people as entities and do not select the         article's news organization as an entity. Here is the text:
   [00m


[1m[95m# Agent:[00m [1m[92mEntity Identifier[00m
[95m## Final Answer:[00m [92m
Companies: None explicitly mentioned in the provided text.
Countries: None explicitly mentioned in the provided text.[00m




[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Task:[00m [92m
        For each entity identified in the previous task, based on the article:
        1. Create a focused summary of what the text says about it
        2. Include relevant context and relationships[00m


[1m[95m# Agent:[00m [1m[92mEntity Context Summarizer[00m
[95m## Final Answer:[00m [92m
The provided text does not mention any specific companies or countries, so there are no entities to summarize or establish relationships for.[00m




[1m[95m# Agent:[00m [1m[92mSentiment Identifier[00m
[95m## Task:[00m [92mIdentify the sentiment regarding each of the entities mentionned in the article as either positive, neutral or negative. Here is the text:
 Reuters STMicro forecasts better quarterly sales, says no tariff panic yet  FILE PHOTO: The logo of STMIcroelectronics is seen outside a company building in Montrouge · Reuters Nathan Vifflin Thu, April 24, 2025 at 7:16 AM GMT+2 2 min read  In This Article: STMPA.PA +1.09%  By Nathan Vifflin  (Reuters) -STMicroelectronics said U.S. tariffs had so far not changed its conversations with customers, after the chipmaker forecast rising second-quarter sales from the yearly low seen in the first three months of 2025.  "We have not seen any panic from customers or immediate reaction," President and CEO Jean-Marc Chery told analysts on Thursday.  But while STMicro is well equipped to face the potential tariffs, it is wary of their impact on global automotive production, he sai

#################### End of crew work. Here is the result : 
[{"date": "2025-04-24", "entity": "STMicroelectronics", "summary": "STMicroelectronics forecasted rising second-quarter sales, indicated no panic from customers regarding tariffs, and expressed confidence in facing potential trade impacts, while also noting the possibility of benefiting from the U.S.-China tariff dispute due to market share shifts.", "sentiment": "positive"}, {"date": "2025-04-24", "entity": "Jean-Marc Chery", "summary": "Jean-Marc Chery, President and CEO of STMicroelectronics, conveyed a cautious yet optimistic outlook, emphasizing the company's preparedness for tariffs and potential benefits from geopolitical trade tensions.", "sentiment": "neutral"}, {"date": "2025-04-24", "entity": "Texas Instruments", "summary": "Texas Instruments forecasted higher second-quarter revenue above Wall Street estimates, suggesting a positive outlook and hope for cyclical recovery, which alleviates tariff concerns for now.",

In [94]:
display(my_results_df.style.set_properties(**{'text-align': 'left'}) \
        .set_table_styles([dict(selector='th', props=[('text-align', 'left')])]))

Unnamed: 0,article_file,article_url,article_size,llm_used,tokens_used,execution_time,nber_entities,first_entity,first_sentiment,comment
0,knowledge/Yahoo Finance 2.txt,url1,2627,gpt-4.1-nano,total_tokens=2550 prompt_tokens=2348 cached_prompt_tokens=0 completion_tokens=202 successful_requests=4,3.971891,1,Google (Alphabet Inc.),positive,
1,knowledge/Yahoo Finance 3.txt,url2,3217,gpt-4.1-nano,total_tokens=2974 prompt_tokens=2516 cached_prompt_tokens=0 completion_tokens=458 successful_requests=4,4.698109,5,Intel,negative,
2,knowledge/Yahoo Finance.txt,url3,4848,gpt-4.1-nano,total_tokens=3643 prompt_tokens=3182 cached_prompt_tokens=1152 completion_tokens=461 successful_requests=4,4.096837,5,Slate,positive,
3,knowledge/Reuters.txt,url4,2420,gpt-4.1-nano,total_tokens=2588 prompt_tokens=2162 cached_prompt_tokens=0 completion_tokens=426 successful_requests=4,4.198361,5,STMicroelectronics,positive,


# Bonus : Trying different LLMs


In [95]:
# google_llm = LLM(
#       model = 'gemini/gemini-2.5-pro-exp-03-25',
#       temperature=0
# )

In [96]:
# groq_llm = LLM(
#     model="groq/llama-3.1-8b-instant",
#     temperature=0
# )

In [97]:
#LLM tested locally on my computer
# local_llm = ChatOpenAI(
#     model = "ollama/llama3.2",
#     base_url = "http://localhost:11434/v1",
#     temperature=0
# )

In [98]:
# # Loop through all my llm .

# # We initialise with 1 article and reset the result_df
# my_results_df = results_df_empty
# file_path = 'knowledge/Yahoo Finance 3.txt'
# url = "url"

# with open(file_path, 'r') as file:
#   article = file.read()

# #Removing Carriage Returns and New Lines from article variable
# # as the Google AI gets confused by these characters
# article = article.replace('\r', '').replace('\n', ' ')
# article_size = get_file_size(file_path)

# # Lets test OpenAI first
# analysis_result, ai_metrics = article_analysis(article, openai_llm)
# print("#################### End of crew work. Here is the result : ")
# print(analysis_result)

# append_to_result_df(
#     article_file=file_path,
#     article_url=url,
#     article_size=article_size,
#     llm_used=ai_metrics["llm_used"],
#     tokens_used=ai_metrics["tokens_used"],
#     execution_time=ai_metrics["execution_time"],
#     crew_json_output=analysis_result
# )

# #Then we try gemini
# analysis_result, ai_metrics = article_analysis(article, google_llm)
# print("#################### End of crew work. Here is the result : ")
# print(analysis_result)

# append_to_result_df(
#     article_file=file_path,
#     article_url=url,
#     article_size=article_size,
#     llm_used=ai_metrics["llm_used"],
#     tokens_used=ai_metrics["tokens_used"],
#     execution_time=ai_metrics["execution_time"],
#     crew_json_output=analysis_result
# )

# #Finally with Groq
# analysis_result, ai_metrics = article_analysis(article, groq_llm)
# print("#################### End of crew work. Here is the result : ")
# print(analysis_result)

# append_to_result_df(
#     article_file=file_path,
#     article_url=url,
#     article_size=article_size,
#     llm_used=ai_metrics["llm_used"],
#     tokens_used=ai_metrics["tokens_used"],
#     execution_time=ai_metrics["execution_time"],
#     crew_json_output=analysis_result
# )



In [99]:
# display(my_results_df.style.set_properties(**{'text-align': 'left'}) \
#         .set_table_styles([dict(selector='th', props=[('text-align', 'left')])]))

# Bonus : Retrieve articles using NewsData.io

In [100]:
# # Function to sanitize filenames
# def sanitize_filename(title):
#     return re.sub(r'[\\/*?:"<>|]', "", title)

# # Function to extract full article text from a URL
# def get_full_article_text(url):
#     try:
#         article = Article(url)
#         article.download()
#         article.parse()
#         return article.text
#     except Exception as e:
#         print(f"Failed to extract article from {url}: {e}")
#         return None

# #Initialize the results table
# my_results_df = results_df_empty

# # Query parameters
# query_params = {
#     "q": "Intel",
#     "language": "en"
# }

# # Make GET request
# response = requests.get(
#     "https://newsdata.io/api/1/news",
#     params={
#         "apikey": os.environ["NEWSDATA_API_KEY"],
#         **query_params
#     }
# )

# print(response)
# # Check response
# if response.status_code == 200:
#     data = response.json()

#     for article in data.get("results", []):
#         title = article.get("title", "Untitled")
#         link = article.get("link", "")

#         if not link:
#             continue  # Skip if no link

#         # Try to get full text from the link
#         full_text = get_full_article_text(link)

#         if full_text:
#             filename = sanitize_filename(title)[:100]

#             # Save the full article text
#             with open(f"knowledge/{filename}.txt", "w", encoding="utf-8") as f:
#                 f.write(full_text)

#             print(f"Saved full article: {title}")
#         else:
#             print(f"Skipping article: {title} (couldn't fetch full text)")

#         article_size = get_file_size(f"knowledge/{filename}.txt")
#         analysis_result, ai_metrics = article_analysis(full_text, openai_llm)
#         print("#################### End of crew work. Here is the result : ")
#         print(analysis_result)

#         print("#################### Let s append those results : ")
#         append_to_result_df(
#             article_file=f"knowledge/{filename}.txt",
#             article_url=link,
#             article_size=article_size,
#             llm_used=ai_metrics["llm_used"],
#             tokens_used=ai_metrics["tokens_used"],
#             execution_time=ai_metrics["execution_time"],
#             crew_json_output=analysis_result
#         )
#         print("#################### result appended to my_result ")


# else:
#     print(f"Failed to retrieve news articles ({response.status_code}): {response.text}")


In [101]:
# display(my_results_df.style.set_properties(**{'text-align': 'left'}) \
#         .set_table_styles([dict(selector='th', props=[('text-align', 'left')])]))