In [10]:
import requests
import os

NEWS_API_KEY = os.getenv('NEWS_API_KEY')

2d98e7ff0dea4a9692096a0400f301d6


In [5]:


def get_news(query: str, endpoint: str, from_date: str, to_date: str, sort_by='popularity', page_size=100, page_number=1, **kwargs) -> dict: 
    if endpoint not in ['top-headlines', 'everything']:
        raise ValueError('Endpoint must be either "top-headlines" or "everything"')
    
    if sort_by not in ['relevancy', 'popularity', 'publishedAt']:
        raise ValueError('Sort by must be either "relevancy", "popularity", or "publishedAt"')
    
    url = ('https://newsapi.org/v2/everything?'
        f'q={query}&'
        f'from={from_date}&'
        f'to={to_date}&'
        f'sortBy={sort_by}&'
        f'pageSize={page_size}&'
        f'page={page_number}&'
        f'language=en&'
        f'apiKey={NEWS_API_KEY}')
    
    response = requests.get(url)
    return response.json()

# get_news('bitcoin', 'everything', '2024-04-17', None, 'popularity', '5', '1')
get_news('bitcoin', 'everything', '2024-05-15', None, 'popularity', '5', '1')

{'status': 'ok',
 'totalResults': 839,
 'articles': [{'source': {'id': 'business-insider',
    'name': 'Business Insider'},
   'author': 'fdemott@insider.com (Filip De Mott)',
   'title': 'Bitcoin could see a wave of forced selling as miners face the reality of lower rewards post-halving, research firm says',
   'description': '"If miners were forced to sell even a fraction of their holdings over the coming month this would have a negative impact on markets," Kaiki Research wrote.',
   'url': 'https://markets.businessinsider.com/news/currencies/bitcoin-price-supply-crypto-currency-miners-btc-sell-off-decline-2024-5',
   'urlToImage': 'https://i.insider.com/6643af41b4abc992e8c96de5?width=1200&format=jpeg',
   'publishedAt': '2024-05-15T12:17:10Z',
   'content': "Markets have long considered bitcoin's recent halving as a major price bolster, but it could bring a wave of selling from one corner of the sector,\xa0according to Kaiko Research.\r\nThe April halving is … [+1708 chars]"},
  {'s

In [6]:
from plugins.operators.NewsAPIOperator import NewsAPIToDataframeOperator

operator = NewsAPIToDataframeOperator(
task_id="extract_api_data",
news_topic='bitcoin',
endpoint = "top-headlines",
from_date = '2024-04-18',
to_date = None,
sort_by = "popularity", 
page_size = 100, 
page_number = 1)

df = operator.execute()



In [7]:
import pandas as pd
def bronze_preprocessing(df: pd.DataFrame) -> pd.DataFrame:
    df['source'] = df['source'].apply(lambda x: x['name'])
    df['publishedAt'] = pd.to_datetime(df['publishedAt'])
    return df
    

df

Unnamed: 0,source,author,title,description,url,urlToImage,publishedAt,content
0,"{'id': None, 'name': 'Yahoo Entertainment'}",Will Shanklin,Block reportedly greenlit transactions involvi...,Block appears to be squarely in the government...,https://consent.yahoo.com/v2/collectConsent?se...,,2024-05-01T18:12:23Z,"If you click 'Accept all', we and our partners..."
1,"{'id': 'wired', 'name': 'Wired'}",Joel Khalili,FTX Creditors Say Payout Deal Is 'an Insult'—a...,FTX has a plan to repay its former crypto cust...,https://www.wired.com/story/ftx-creditors-cryp...,https://media.wired.com/photos/663ba309e675545...,2024-05-08T17:00:02Z,Some creditors of the bankrupt crypto exchange...
2,"{'id': 'wired', 'name': 'Wired'}",Paul Ford,Generative AI Is Totally Shameless. I Want to ...,The best thing about brain-melting software li...,https://www.wired.com/story/generative-ai-tota...,https://media.wired.com/photos/662a84cc8d7b95b...,2024-05-14T12:00:00Z,AI has a lot of problems. It helps itself to t...
3,"{'id': 'wired', 'name': 'Wired'}",Andy Greenberg,A Vast New Dataset Could Supercharge the AI Hu...,"Blockchain analysis firm Elliptic, MIT, and IB...",https://www.wired.com/story/ai-crypto-tracing-...,https://media.wired.com/photos/6631a1936dc0c77...,2024-05-01T13:00:00Z,"As a test of their resulting AI tool, the rese..."
4,"{'id': 'business-insider', 'name': 'Business I...",fdemott@insider.com (Filip De Mott),Bitcoin could see a wave of forced selling as ...,"""If miners were forced to sell even a fraction...",https://markets.businessinsider.com/news/curre...,https://i.insider.com/6643af41b4abc992e8c96de5...,2024-05-15T12:17:10Z,Markets have long considered bitcoin's recent ...
...,...,...,...,...,...,...,...,...
95,"{'id': None, 'name': 'Techmeme.com'}",,"Kaiko: in the past year, Binance's bitcoin tra...",Suvashree Ghosh / Bloomberg:\nKaiko: in the pa...,https://www.techmeme.com/240423/p8,https://assets.bwbx.io/images/users/iqjWHBFdfx...,2024-04-23T09:55:01Z,About This Page\r\nThis is a Techmeme archive ...
96,"{'id': None, 'name': 'Internet'}",info@thehackernews.com (The Hacker News),Russian Operator of BTC-e Crypto Exchange Plea...,A Russian operator of a now-dismantled BTC-e c...,https://thehackernews.com/2024/05/russian-oper...,https://blogger.googleusercontent.com/img/b/R2...,2024-05-07T09:32:00Z,A Russian operator of a now-dismantled BTC-e c...
97,"{'id': None, 'name': 'IndieWire'}",Christian Zilko,Sundance Institute Unveils 2024 Fellows for Di...,The developmental program has helped develop n...,https://www.indiewire.com/news/general-news/su...,https://www.indiewire.com/wp-content/uploads/2...,2024-04-29T16:00:00Z,The Sundance Institute has announced its fello...
98,"{'id': None, 'name': 'Quartz India'}",Vinamrata Chaturvedi,"Bitcoin recovers to $64,000 as spot Bitcoin ET...","Bitcoin rebounded to $64,000 on Monday morning...",https://qz.com/bitcoin-jumps-to-64-000-as-spot...,"https://i.kinja-img.com/image/upload/c_fill,h_...",2024-05-06T14:18:00Z,"Bitcoin rebounded to $64,000 on Monday morning..."


In [12]:
df.iloc[0]['url']

'https://consent.yahoo.com/v2/collectConsent?sessionId=1_cc-session_42a6092c-1262-4c95-9f24-7d3681ec2288'