# üìä Comparativa de APIs de Noticias

| API             | L√≠mite Gratuito             | Cobertura                  | Datos Proporcionados                                     | An√°lisis de Sentimiento | Ordenaci√≥n Disponible            | Idiomas |
|----------------|----------------------------|----------------------------|----------------------------------------------------------|--------------------------|----------------------------------|---------|
| **GNews.io**   | 100 solicitudes/d√≠a, 10 art√≠culos m√°x. por solicitud | Noticias generales y tecnol√≥gicas | T√≠tulo, URL, fuente, descripci√≥n, imagen                 | ‚ùå No                     | Relevancia, Fecha                | ‚úÖ S√≠ |
| **TheNewsAPI** | 100 solicitudes/d√≠a, 3 art√≠culos m√°x. por solicitud  | Noticias generales            | T√≠tulo, URL, fuente, resumen                             | ‚ùå No                     | Relevancia, Fecha                | ‚úÖ S√≠ |
| **Finlight.me**| 10,000 solicitudes/mes      | Noticias financieras        | T√≠tulo, URL, fuente, contenido completo, sentimiento     | ‚úÖ S√≠ (Positivo, Neutro, Negativo) | Fecha                            | ‚úÖ S√≠ |



In [1]:
from datetime import datetime, timedelta
import time
import requests
import pandas as pd
from bs4 import BeautifulSoup
from finlight_client import FinlightApi

In [2]:
# API Keys
GNEWS_API_KEY = "087e2241d8a55daf522083c0ddab7547"
THENEWS_API_KEY = "PBqwcAoCbsLtDB2FF8NQNxpjOEeGQSkzu4tGB79E"
FINLIGHT_API_KEY = "sk_8a42ac974da4a14f105666978a59334188bf1bd406626e8fd3ef0fe0de46f771" 

In [5]:
# Utility functions
def format_duration(td):
    total_seconds = int(td.total_seconds())
    hours = total_seconds // 3600
    minutes = (total_seconds % 3600) // 60
    seconds = total_seconds % 60
    return f"{hours:02}:{minutes:02}:{seconds:02}"

def get_clean_article_content(url):
    try:
        headers = {"User-Agent": "Mozilla/5.0"}
        response = requests.get(url, headers=headers, timeout=10)
        if response.status_code == 200:
            soup = BeautifulSoup(response.text, 'html.parser')
            for tag in ["script", "style", "aside", "nav", "footer", "header", "form", "button"]:
                for element in soup.find_all(tag):
                    element.decompose()
            paragraphs = soup.find_all("p")
            article_text = "\n".join([p.get_text().strip() for p in paragraphs if p.get_text().strip()])
            return article_text if len(article_text) > 100 else "‚ö†Ô∏è Article too short or not relevant"
        else:
            return f"‚ùå Error {response.status_code}"
    except Exception as e:
        return f"‚ùå Exception: {str(e)}"

def scrape_article_contents(df, url_column="urls", content_column="article_contents", delay=2):
    print(f"üîé Starting article scraping for {len(df)} days...")
    start_time = datetime.now()

    for index, row in df.iterrows():
        date_info = row.get("date", f"index {index}")
        print(f"\nüìÜ Scraping articles for {date_info}...")

        article_texts = []
        urls = row.get(url_column, [])

        if not isinstance(urls, list) or not urls:
            print(f"‚ö†Ô∏è No valid URLs to scrape for {date_info}. Skipping.")
            df.at[index, content_column] = []
            continue

        total_urls = len(urls)
        for i, url in enumerate(urls, 1):
            print(f"üîç Scraping URL {i}/{total_urls} for {date_info}")
            try:
                content = get_clean_article_content(url)
                article_texts.append(content)
            except Exception as e:
                article_texts.append(f"‚ùå Error scraping {url}: {str(e)}")
            time.sleep(delay)

        df.at[index, content_column] = article_texts

    end_time = datetime.now()
    duration = end_time - start_time
    print(f"\n‚úÖ Finished scraping. Duration: {format_duration(duration)}")
    return df

def fetch_gnews_articles(start_date, end_date, query, language="en", max_articles=10, sort_by="publishedAt", delay=2):
    print("üöÄ Starting GNews article collection...")
    news_data = []
    start_time = datetime.now()

    current_date = start_date
    while current_date <= end_date:
        formatted_date = current_date.strftime("%Y-%m-%d")
        from_date = current_date.strftime("%Y-%m-%dT00:00:00Z")
        to_date = current_date.strftime("%Y-%m-%dT23:59:59Z")

        print(f"üìÖ Fetching articles for {formatted_date}...")

        url = f"https://gnews.io/api/v4/search?q={query}&lang={language}&max={max_articles}&sortby={sort_by}&from={from_date}&to={to_date}&category=business,technology&apikey={GNEWS_API_KEY}"
        try:
            response = requests.get(url)
            titles, urls, sources, descriptions = [], [], [], []

            if response.status_code == 200:
                data = response.json()
                if "articles" in data and data["articles"]:
                    print(f"‚úÖ {len(data['articles'])} articles found for {formatted_date}")
                    for article in data["articles"]:
                        titles.append(article["title"])
                        urls.append(article["url"])
                        sources.append(article["source"]["name"])
                        descriptions.append(article["description"])
                else:
                    print(f"‚ö†Ô∏è No articles found for {formatted_date}")
            else:
                print(f"‚ùå Error {response.status_code} on {formatted_date}: {response.text}")

            news_data.append({
                "date": formatted_date,
                "titles": titles,
                "urls": urls,
                "sources": sources,
                "descriptions": descriptions
            })

        except Exception as e:
            print(f"‚ùå Exception while fetching {formatted_date}: {str(e)}")

        current_date += timedelta(days=1)
        time.sleep(delay)

    end_time = datetime.now()
    print("‚úÖ Finished article collection from GNews.")
    print(f"‚è±Ô∏è Time taken for API article fetching: {format_duration(end_time - start_time)}")

    df = pd.DataFrame(news_data)
    df["article_contents"] = None
    return df

def fetch_thenewsapi_articles(start_date, end_date, query, language="en", max_articles=3, sort_by="relevance_score,published_at", delay=2):
    print("üöÄ Starting TheNewsAPI article collection...")
    news_data = []
    start_time = datetime.now()

    current_date = start_date
    while current_date <= end_date:
        formatted_date = current_date.strftime("%Y-%m-%d")
        print(f"üìÖ Fetching articles for {formatted_date}...")

        url = f"https://api.thenewsapi.com/v1/news/all?search={query}&language={language}&limit={max_articles}&sort={sort_by}&published_on={formatted_date}&api_token={THENEWS_API_KEY}"
        try:
            response = requests.get(url)
            titles, urls, sources, descriptions = [], [], [], []

            if response.status_code == 200:
                data = response.json()
                if "data" in data and data["data"]:
                    print(f"‚úÖ {len(data['data'])} articles found for {formatted_date}")
                    for article in data["data"]:
                        titles.append(article.get("title", ""))
                        urls.append(article.get("url", ""))
                        sources.append(article.get("source", ""))
                        descriptions.append(article.get("description", ""))
                else:
                    print(f"‚ö†Ô∏è No articles found for {formatted_date}")
            else:
                print(f"‚ùå Error {response.status_code} on {formatted_date}: {response.text}")

            news_data.append({
                "date": formatted_date,
                "titles": titles,
                "urls": urls,
                "sources": sources,
                "descriptions": descriptions
            })

        except Exception as e:
            print(f"‚ùå Exception while fetching {formatted_date}: {str(e)}")

        current_date += timedelta(days=1)
        time.sleep(delay)

    end_time = datetime.now()
    print("‚úÖ Finished article collection from TheNewsAPI.")
    print(f"‚è±Ô∏è Time taken for API article fetching: {format_duration(end_time - start_time)}")

    df = pd.DataFrame(news_data)
    df["article_contents"] = None
    return df

In [6]:
# Shared config
start_date = datetime(2024, 1, 1)
end_date = datetime(2024, 1, 31)
delay_seconds = 2

# GNews - gnews.io

In [7]:
# GNews execution
query_gnews = "Bitcoin OR crypto OR blockchain OR cryptocurrency OR BTC"
df_gnews = fetch_gnews_articles(start_date, end_date, query_gnews, delay=delay_seconds)
df_gnews = scrape_article_contents(df_gnews, delay=delay_seconds)

üöÄ Starting GNews article collection...
üìÖ Fetching articles for 2024-01-01...
‚úÖ 6 articles found for 2024-01-01
üìÖ Fetching articles for 2024-01-02...
‚úÖ 10 articles found for 2024-01-02
üìÖ Fetching articles for 2024-01-03...
‚úÖ 10 articles found for 2024-01-03
üìÖ Fetching articles for 2024-01-04...
‚úÖ 10 articles found for 2024-01-04
üìÖ Fetching articles for 2024-01-05...
‚úÖ 9 articles found for 2024-01-05
üìÖ Fetching articles for 2024-01-06...
‚úÖ 3 articles found for 2024-01-06
üìÖ Fetching articles for 2024-01-07...
‚úÖ 1 articles found for 2024-01-07
üìÖ Fetching articles for 2024-01-08...
‚úÖ 10 articles found for 2024-01-08
üìÖ Fetching articles for 2024-01-09...
‚úÖ 10 articles found for 2024-01-09
üìÖ Fetching articles for 2024-01-10...
‚úÖ 10 articles found for 2024-01-10
üìÖ Fetching articles for 2024-01-11...
‚úÖ 10 articles found for 2024-01-11
üìÖ Fetching articles for 2024-01-12...
‚úÖ 10 articles found for 2024-01-12
üìÖ Fetching articles for

In [8]:
df_gnews

Unnamed: 0,date,titles,urls,sources,descriptions,article_contents
0,2024-01-01,[A new Netflix film about the wild early days ...,[https://bgr.com/entertainment/bitconned-is-th...,"[BGR, Livemint, Livemint, Investing.com, Seeki...",[Bitconned is a new Netflix documentary about ...,"[If you buy through a BGR link, we may earn an..."
1,2024-01-02,[MicroStrategy‚Äôs Saylor is selling $216 millio...,[https://finance.yahoo.com/news/microstrategy-...,"[Yahoo Finance, Yahoo Canada Finance, Yahoo Fi...",[MicroStrategy has benefited from a rally in b...,"[‚ùå Error 404, Oops, something went wrong\nBitc..."
2,2024-01-03,[Manchester United will be restricted to 'a ha...,[https://www.skysports.com/football/transfer-p...,"[Sky Sports, Sky Sports, Sky Sports, Sky Sport...",[Plus: Arsenal have complained to the PGMOL ab...,[Plus: Arsenal have complained to the PGMOL ab...
3,2024-01-04,[Logan Paul announces buyback program for fail...,[https://www.mmafighting.com/2024/1/4/24025513...,"[MMA Fighting, The Globe and Mail, Yahoo Canad...",[Logan Paul released a lengthy statement on Th...,[Filed under:\nLogan Paul will attempt to righ...
4,2024-01-05,[Elon Musk drops price of X gold checks amid r...,[https://arstechnica.com/tech-policy/2024/01/c...,"[Ars Technica, Livemint, Seeking Alpha, Kotaku...",[Reports come the same week X reduced the cost...,[There's currently a surge in cryptocurrency a...
5,2024-01-06,[Mint Explainer: Why India‚Äôs targeting Binance...,[https://www.livemint.com/companies/mint-expla...,"[Livemint, Devdiscourse, Firstpost]",[Millions of Indian users were transacting wit...,[This is a Mint Premium article gifted to you....
6,2024-01-07,"[Tubridy‚Äôs new radio show drives more than 8,0...",[https://www.irishtimes.com/business/2024/01/0...,[The Irish Times],[Seen and Heard: Q102 and Virgin Radio UK down...,"[More than 8,000 people downloaded Q102 and th..."
7,2024-01-08,"[ASX to rally, Nvidia bolsters techs, oil drop...",[https://www.afr.com/markets/equity-markets/as...,"[The Australian Financial Review, The Australi...",[Australian shares are set to open higher. Nvi...,"[ASX rebounds on tech rally, strong retail sal..."
8,2024-01-09,[SEC chair denies a bitcoin ETF has been appro...,[https://www.guelphtoday.com/national-business...,"[GuelphToday, ElliotLakeToday.com, BradfordTod...",[NEW YORK (AP) ‚Äî The Securities and Exchange C...,[NEW YORK (AP) ‚Äî The Securities and Exchange C...
9,2024-01-10,"[Bitcoin's badge of honor, Bitcoin exchange-tr...",[https://www.cnbc.com/2024/01/11/stock-markets...,"[CNBC, CBC.ca, POLITICO, Devdiscourse, CNBC, Y...",[Bitcoin just received its biggest stamp of ap...,[In this article\nThis report is from today's ...


# TheNewsAPI - thenewsapi.com

In [9]:
# TheNewsAPI execution
query_thenews = "Bitcoin"
df_thenews = fetch_thenewsapi_articles(start_date, end_date, query_thenews, delay=delay_seconds)
df_thenews = scrape_article_contents(df_thenews, delay=delay_seconds)

üöÄ Starting TheNewsAPI article collection...
üìÖ Fetching articles for 2024-01-01...
‚úÖ 3 articles found for 2024-01-01
üìÖ Fetching articles for 2024-01-02...
‚úÖ 3 articles found for 2024-01-02
üìÖ Fetching articles for 2024-01-03...
‚úÖ 3 articles found for 2024-01-03
üìÖ Fetching articles for 2024-01-04...
‚úÖ 3 articles found for 2024-01-04
üìÖ Fetching articles for 2024-01-05...
‚úÖ 3 articles found for 2024-01-05
üìÖ Fetching articles for 2024-01-06...
‚úÖ 3 articles found for 2024-01-06
üìÖ Fetching articles for 2024-01-07...
‚úÖ 3 articles found for 2024-01-07
üìÖ Fetching articles for 2024-01-08...
‚úÖ 3 articles found for 2024-01-08
üìÖ Fetching articles for 2024-01-09...
‚úÖ 3 articles found for 2024-01-09
üìÖ Fetching articles for 2024-01-10...
‚úÖ 3 articles found for 2024-01-10
üìÖ Fetching articles for 2024-01-11...
‚úÖ 3 articles found for 2024-01-11
üìÖ Fetching articles for 2024-01-12...
‚úÖ 3 articles found for 2024-01-12
üìÖ Fetching articles for 20

In [10]:
df_thenews

Unnamed: 0,date,titles,urls,sources,descriptions,article_contents
0,2024-01-01,"[Risk Assessments, Ads: Last-Minute Preparatio...",[https://www.financemagnates.com/cryptocurrenc...,"[financemagnates.com, ibtimes.com, americanban...",[US crypto companies have made last-minute cha...,[US crypto companies have made last-minute cha...
1,2024-01-02,"[Bitcoin: Bitcoin topped $45,000 for first tim...",[https://timesofindia.indiatimes.com/business/...,"[timesofindia.indiatimes.com, finance.yahoo.co...","[Cryptocurrency News: Bitcoin surged past $45,...",[10 Most Affordable Cities in India to Buy a H...
2,2024-01-03,"[How To Get Bitcoin $BTC ‚Äî Complete Guide, Cla...",[https://medium.com/@PharaohOdinOrbitWealth/ho...,"[medium.com, medium.com, zerohedge.com]",[Dive into the Bitcoin $BTC Airdrop: A Distinc...,"[‚ùå Error 410, ‚ùå Error 410, ‚ö†Ô∏è Article too shor..."
3,2024-01-04,"[Bitcoin ETF Dilemma: Spot Markets, Futures Fa...",[https://www.benzinga.com/markets/cryptocurren...,"[benzinga.com, forbes.com, livemint.com]","[As¬†Spot Bitcoin ETFs¬†loom, the market is watc...","[As¬†Spot Bitcoin ETFs¬†loom, the market is watc..."
4,2024-01-05,"[Bitcoin Spot ETF Delayed till May?, Grayscale...",[https://medium.datadriveninvestor.com/bitcoin...,"[medium.com, seekingalpha.com, finextra.com]",[I‚Äôve been hearing a lot of bearishness on Eth...,"[‚ùå Error 410, ‚ùå Error 403, ‚ö†Ô∏è Article too shor..."
5,2024-01-06,"[How To Get Bitcoin $BTC ‚Äî Complete Guide, The...",[https://medium.com/@OsirisVolcanoInvest/how-t...,"[medium.com, dailyhodl.com, benzinga.com]","[In the world of cryptocurrencies, where innov...","[‚ùå Error 410, The usual financial suspects wan..."
6,2024-01-07,[What‚Äôs about the electricity consumption of B...,[https://medium.com/@ecuyer.duchevalier/whats-...,"[medium.com, investing.com, investing.com]","[Unfortunately, finding unbiased answers to th...",[Sign in\nSign in\nHome\nLibrary\nStories\nSta...
7,2024-01-08,[Bitcoin Surpasses $45K As Issuers Submit Fina...,[https://www.ibtimes.com/bitcoin-surpasses-45k...,"[ibtimes.com, financemagnates.com, techcentral...",[Bitcoin showed signs of recovery this week by...,"[Bitcoin reclaimed the $45,000 price point on ..."
8,2024-01-09,[Bitcoin briefly spiked on false report that S...,[https://seekingalpha.com/news/4053640-sec-app...,"[seekingalpha.com, ibtimes.com, benzinga.com]",[The U.S. Securities and Exchange Commission g...,"[‚ùå Error 403, Bitcoin experienced a substantia..."
9,2024-01-10,[Grayscale Investments¬Æ Receives SEC Approval ...,[https://www.benzinga.com/pressreleases/24/01/...,"[benzinga.com, cnbc.com, wealthmanagement.com]",[Grayscale Bitcoin Trust will become world's s...,[Benzinga Rankings give you vital metrics on a...


# News API - finlight.me

In [4]:
# Par√°metros de prueba
query = "BTC"
language = "en"  # Idioma: 'en' para ingl√©s, 'es' para espa√±ol, etc.
pageSize = 20

# Inicializar el cliente de la API
client = FinlightApi(config={"api_key": FINLIGHT_API_KEY})

# Realizar la solicitud
response = client.articles.get_extended_articles(
    params={
        "query": query,
        "language": language,
        "pageSize": pageSize,
        "from": "2024-01-01",
        "to": "2024-01-31",
        "order": "DESC",
        
    }
)

# Imprimir la respuesta
response

{'status': 'ok', 'page': 1, 'pageSize': 20, 'articles': []}

In [None]:
def obtener_noticias(fecha):
    response = client.articles.get_extended_articles(
        params={
            "query": "Bitcoin OR crypto OR stocks OR finance",
            "language": "en",
            "from": fecha,
            "to": fecha,
            "order": "DESC",
            "pageSize": 50  # Tomar varias noticias por d√≠a
        }
    )
    return response.get("articles", [])

In [None]:
def calcular_sentimiento_promedio(noticias):
    sentimientos = []
    confianzas = []
    
    for noticia in noticias:
        sentimiento = noticia.get("sentiment", "neutral")
        confianza = float(noticia.get("confidence", 0))
        
        # Asignar valores num√©ricos a los sentimientos
        if sentimiento == "positive":
            valor = 1
        elif sentimiento == "negative":
            valor = -1
        else:
            valor = 0
        
        sentimientos.append(valor * confianza)  # Peso del sentimiento por confianza
        confianzas.append(confianza)

    if confianzas:
        return np.sum(sentimientos) / np.sum(confianzas)  # Media ponderada
    else:
        return 0  # Neutral si no hay datos

In [None]:
response = client.articles.get_extended_articles(
            params={
                "query": "Bitcoin",  # Puedes cambiar la consulta a un t√©rmino m√°s general
                "language": "en",
                "from": "2024-06-01",
                "to": "2024-06-31",
                "order": "DESC",
                "pageSize": 5,  # Buscar hasta 5 noticias por d√≠a
            }
        )


In [None]:
response