# Descarga de datos con APIs

In [2]:
# === 1. IMPORTS Y CONFIGURACIÓN ===

from newsapi import NewsApiClient
import requests
import feedparser
import pandas as pd
from datetime import datetime, timedelta
import time

# Claves API
NEWSAPI_KEY = 'd7c0734c87d04ed69ea623c77e7406b3'
GNEWS_API_KEY = '8b50c1681d21553b25406f228aca6938'
CRYPTOPANIC_API_KEY = 'b4124ef89cfd6fdce685b3218b21a6378dc6ed01'

# Inicializar NewsAPI
newsapi = NewsApiClient(api_key=NEWSAPI_KEY)

RSS_FEEDS = {
    'BTC': [
        "https://feeds.feedburner.com/CoinDesk",
        "https://cointelegraph.com/rss",
        "https://www.newsbtc.com/feed/",
        "https://bitcoinmagazine.com/.rss/full/",
        "https://cryptopotato.com/feed/",
        "https://decrypt.co/feed",
        "https://www.theblock.co/feed/rss",
        "https://cryptobriefing.com/feed/",
        "https://blockworks.co/rss.xml",
        "https://www.dlnews.com/rss/",
        "https://u.today/rss",
        "https://beincrypto.com/feed/",
        "https://www.bitcoin.com/feed/",
        "https://cryptoslate.com/feed/",
        "https://ambcrypto.com/feed/"
    ],
    'TECH': [
        "https://techcrunch.com/feed/",
        "https://www.theverge.com/rss/index.xml",
        "https://arstechnica.com/feed/",
        "https://www.wired.com/feed/rss",
        "https://gizmodo.com/rss",
        "https://www.engadget.com/rss.xml",
        "https://venturebeat.com/feed/",
        "https://www.zdnet.com/topic/artificial-intelligence/rss/",
        "https://www.technologyreview.com/feed/"
    ],
    'MACRO': [
        "https://feeds.bbci.co.uk/news/business/rss.xml",
        "https://www.cnbc.com/id/100003114/device/rss/rss.html",
        "https://www.ft.com/global-economy?format=rss",
        "https://www.bloomberg.com/feeds/markets.rss",
        "https://www.wsj.com/xml/rss/3_7085.xml",
        "https://www.economist.com/finance-and-economics/rss.xml",
        "https://www.reuters.com/rssFeed/businessNews",
        "https://www.marketwatch.com/rss/economy-politics",
        "https://www.investopedia.com/rss/articles.xml",
        "https://seekingalpha.com/api/v2/news.rss?tag=market-news"
    ]
}

print("✅ Configuración lista. APIs y feeds definidos.")

✅ Configuración lista. APIs y feeds definidos.


In [3]:
# === 2. DEFINICIÓN DE PALABRAS CLAVE POR EJE ===

KEYWORDS_BTC = (
    "(bitcoin OR btc OR cryptocurrency OR 'crypto price' OR 'digital gold' OR halving) "
    "NOT (fraud OR scam OR hack OR theft)"
)

KEYWORDS_TECH = (
    "(blockchain OR web3 OR 'RAM memory' OR 'semiconductor shortage' OR 'AI investment' OR 'DeFi' OR 'mining rig')"
)

KEYWORDS_MACRO = (
    "(FED OR 'interest rate' OR inflation OR recession OR 'treasury bond' OR 'stock market crash' OR 'quantitative easing')"
)

keyword_map = {
    'BTC': KEYWORDS_BTC,
    'TECH': KEYWORDS_TECH,
    'MACRO': KEYWORDS_MACRO
}

# Rango de fechas (cámbialo fácil)
FECHA_INICIO = '2025-12-15'
FECHA_FIN = '2025-12-17'

print("✅ Palabras clave definidas para los 3 ejes.")

✅ Palabras clave definidas para los 3 ejes.


In [4]:
# === 3. FUNCIONES PARA CADA FUENTE (CON DEBUG) ===

def fetch_newsapi(query, from_date, to_date, axis):
    articles = []
    try:
        response = newsapi.get_everything(
            q=query,
            language='en',
            from_param=from_date,
            to=to_date,
            sort_by='relevancy',
            page_size=100
        )
        if response['status'] == 'ok':
            for article in response['articles']:
                articles.append({
                    'publishedAt': article['publishedAt'],
                    'title': article['title'],
                    'description': article['description'],
                    'source': article['source']['name'],
                    'axis': axis
                })
    except Exception as e:
        print(f"❌ NewsAPI error ({axis}): {e}")
    return articles

def fetch_gnews(query, from_date, to_date, axis):
    articles = []
    params = {
        "q": query,
        "from": from_date,
        "to": to_date,
        "lang": "en",
        "max": 100,
        "token": GNEWS_API_KEY
    }
    try:
        response = requests.get("https://gnews.io/api/v4/search", params=params, timeout=15)
        data = response.json()
        print(f"Debug GNews ({axis}): Status {response.status_code}")
        if 'information' in data:
            print(f"  Aviso GNews: {data['information']}")
        raw_articles = data.get('articles', [])
        for article in raw_articles:
            articles.append({
                'publishedAt': article['publishedAt'],
                'title': article['title'],
                'description': article.get('description'),
                'source': article['source']['name'],
                'axis': axis
            })
        print(f"  - {axis} GNews: {len(articles)} artículos (con delay 12h en free)")
    except Exception as e:
        print(f"❌ GNews error ({axis}): {e}")
    return articles

def fetch_cryptopanic():
    articles = []
    params = {
        "auth_token": CRYPTOPANIC_API_KEY,
        "public": "true",
        "kind": "news"
    }
    try:
        response = requests.get("https://cryptopanic.com/api/v1/posts/", params=params, timeout=15)
        print(f"Debug CryptoPanic: Status {response.status_code}")
        print(f"  Respuesta cruda (primeros 200 chars): {response.text[:200]}")
        data = response.json()
        for post in data.get("results", []):
            articles.append({
                'publishedAt': post['published_at'],
                'title': post['title'],
                'description': None,
                'source': post.get('source', {}).get('title', 'CryptoPanic'),
                'axis': 'BTC'
            })
    except Exception as e:
        print(f"❌ CryptoPanic error detallado: {e}")
    return articles

def fetch_rss(axis):
    articles = []
    for feed_url in RSS_FEEDS.get(axis, []):
        feed = feedparser.parse(feed_url)
        print(f"Debug RSS ({axis}, {feed_url}): {len(feed.entries)} entradas")
        for entry in feed.entries:
            articles.append({
                'publishedAt': entry.get('published'),
                'title': entry.title,
                'description': entry.get('summary'),
                'source': feed.feed.get('title', 'RSS'),
                'axis': axis
            })
    return articles

print("✅ Funciones de fetch listas (con debug).")

✅ Funciones de fetch listas (con debug).


In [5]:
# === 4. BUCLE PRINCIPAL DE EXTRACCIÓN ===

start_date = datetime.strptime(FECHA_INICIO, '%Y-%m-%d')
end_date = datetime.strptime(FECHA_FIN, '%Y-%m-%d') + timedelta(days=1)

all_news_data = {'BTC': [], 'TECH': [], 'MACRO': []}

# CryptoPanic y RSS una vez
print("Buscando CryptoPanic...")
all_news_data['BTC'].extend(fetch_cryptopanic())

print("Buscando RSS feeds...")
for axis in ['BTC', 'TECH', 'MACRO']:
    rss_articles = fetch_rss(axis)
    all_news_data[axis].extend(rss_articles)

current_date = start_date
print(f"--- INICIANDO EXTRACCIÓN DÍA A DÍA ---\nRANGO: {FECHA_INICIO} a {FECHA_FIN}")

while current_date < end_date:
    day_str = current_date.strftime('%Y-%m-%d')
    print(f"\nProcesando día: {day_str}")
    
    for axis_name, keyword_query in keyword_map.items():
        newsapi_articles = fetch_newsapi(keyword_query, day_str, day_str, axis_name)
        all_news_data[axis_name].extend(newsapi_articles)
        print(f"  - {axis_name} NewsAPI: {len(newsapi_articles)} artículos")
        
        gnews_articles = fetch_gnews(keyword_query, day_str, day_str, axis_name)
        all_news_data[axis_name].extend(gnews_articles)
        
        time.sleep(0.6)  # Pausa segura
    
    current_date += timedelta(days=1)

print("\n--- EXTRACCIÓN FINALIZADA ---")
for axis, data in all_news_data.items():
    print(f"Total {axis}: {len(data)} artículos")

Buscando CryptoPanic...
Debug CryptoPanic: Status 404
  Respuesta cruda (primeros 200 chars): <!DOCTYPE html>
<html lang="en">
<head>
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
    <meta name="viewport" cont
❌ CryptoPanic error detallado: Expecting value: line 1 column 1 (char 0)
Buscando RSS feeds...
Debug RSS (BTC, https://feeds.feedburner.com/CoinDesk): 25 entradas
Debug RSS (BTC, https://cointelegraph.com/rss): 30 entradas
Debug RSS (BTC, https://www.newsbtc.com/feed/): 10 entradas
Debug RSS (BTC, https://bitcoinmagazine.com/.rss/full/): 10 entradas
Debug RSS (BTC, https://cryptopotato.com/feed/): 36 entradas
Debug RSS (BTC, https://decrypt.co/feed): 54 entradas
Debug RSS (BTC, https://www.theblock.co/feed/rss): 0 entradas
Debug RSS (BTC, https://cryptobriefing.com/feed/): 30 entradas
Debug RSS (BTC, https://blockworks.co/rss.xml): 0 entradas
Debug RSS (BTC, https://www.dlnews.com/rss/): 0 entrad

In [6]:
# === 5. CONSOLIDAR Y GUARDAR CSV ===

full_articles = all_news_data['BTC'] + all_news_data['TECH'] + all_news_data['MACRO']
df_raw_news = pd.DataFrame(full_articles)

fecha_actual = datetime.now().strftime('%Y.%m.%d')
filename = f'{fecha_actual}.noticias_raw_sentimiento.csv'

df_raw_news.to_csv(filename, index=False)

print(f"\n✅ Datos guardados en '{filename}'")
print("Total artículos:", len(df_raw_news))
print("\nEjemplo:")
display(df_raw_news.head(10))


✅ Datos guardados en '2025.12.17.noticias_raw_sentimiento.csv'
Total artículos: 921

Ejemplo:


Unnamed: 0,publishedAt,title,description,source,axis
0,"Wed, 17 Dec 2025 15:19:02 +0000","Bitcoin re-takes $90,000 as price spikes early...",,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
1,"Wed, 17 Dec 2025 15:00:00 +0000",Most Influential: Vlad Tenev,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
2,"Wed, 17 Dec 2025 15:00:00 +0000",Most Influential: Paul Atkins,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
3,"Wed, 17 Dec 2025 15:00:00 +0000",Most Influential: Caroline Pham,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
4,"Wed, 17 Dec 2025 15:00:00 +0000",Most Influential: Carlos Domingo,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
5,"Wed, 17 Dec 2025 15:00:00 +0000",Most Influential: Jeremy Allaire,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
6,"Wed, 17 Dec 2025 14:18:43 +0000",CoinDesk 20 Performance Update: NEAR Protocol ...,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
7,"Wed, 17 Dec 2025 14:00:00 +0000",The Deepfake Reckoning: Why Crypto’s Next Secu...,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
8,"Wed, 17 Dec 2025 13:56:03 +0000",Hut 8 stock surges 20% on Fluidstack AI data c...,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC
9,"Wed, 17 Dec 2025 13:47:30 +0000",Don't call it QE — the Fed's $40 billion bill ...,,"CoinDesk: Bitcoin, Ethereum, Crypto News and P...",BTC


In [8]:
display(df_raw_news.tail(10))

Unnamed: 0,publishedAt,title,description,source,axis
911,"Sun, 16 Mar 2025 11:20:47 +0000",Why rents are rising too fast,"Rich-world tenants are angry, and have reason ...",Finance & economics,MACRO
912,"Thu, 13 Mar 2025 11:37:50 +0000",Can Europe cope with a free-spending Germany?,Pity the continent’s exporters,Finance & economics,MACRO
913,"Thu, 13 Mar 2025 11:15:13 +0000",More testosterone means higher pay—for some men,A changing appetite for status games could pla...,Finance & economics,MACRO
914,"Thu, 13 Mar 2025 10:51:15 +0000",Why “labour shortages” don’t really exist,"Use the term, and you are almost always a bad ...",Finance & economics,MACRO
915,"Thu, 13 Mar 2025 10:47:20 +0000",Your guide to the new anti-immigration argument,Nativists say that migrants raise house prices...,Finance & economics,MACRO
916,"Wed, 12 Mar 2025 19:25:53 +0000",What sparks an investing revolution?,Ideas that emerged from the University of Chic...,Finance & economics,MACRO
917,"Tue, 11 Mar 2025 18:50:13 +0000",Will America’s stockmarket convulsions spread?,Investors are hurrying to find alternatives—bu...,Finance & economics,MACRO
918,"Mon, 10 Mar 2025 22:08:43 +0000",How Trump provoked a stockmarket sell-off,Will the president win back investors? Does he...,Finance & economics,MACRO
919,"Sun, 09 Mar 2025 15:41:03 +0000",Does Trump really want a weaker dollar?,Overturning three decades of American policy w...,Finance & economics,MACRO
920,"Sun, 09 Mar 2025 15:36:58 +0000",Investors think the Russia-Ukraine war will en...,"The prospect of peace is reshaping markets, in...",Finance & economics,MACRO
