In [None]:
import json
import logging
import os
from typing import List, Optional, Union
from websocket import create_connection

from datetime import timedelta
import json
from typing import Dict

from pandas import read_json
import requests
from bytewax import operators as op
from bytewax.connectors.stdio import StdOutSink
from bytewax.connectors.files import FileSink
from bytewax.dataflow import Dataflow
from bytewax.inputs import SimplePollingSource, DynamicSource
# from bytewax.connectors.kafka import operators as kop
# from bytewax.connectors.kafka import KafkaSinkMessage

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [2]:
api_key="PKM19APHZSD7EDUI20D6"
api_secret="GifphcRRfVCyc4VTfaTBg9z4MZT5nP3rdZVgkq0x"

In [95]:
from typing import List, Optional
import datetime

class AlpacaNewsBatchClient:
    """
    Alpaca News API Client that uses a RESTful API to fetch news data.

    Attributes:
        NEWS_URL (str): The URL for the Alpaca News API.
        _from_datetime (datetime.datetime): The start datetime for the news data.
        _to_datetime (datetime.datetime): The end datetime for the news data.
        _api_key (str): The API key for the Alpaca News API.
        _api_secret (str): The API secret for the Alpaca News API.
        _tickers (List[str]): A list of tickers to filter the news data.
        _page_token (str): The page token for the next page of news data.
        _first_request (bool): A flag indicating whether this is the first request for news data.
    """

    NEWS_URL = "https://data.alpaca.markets/v1beta1/news"

    def __init__(
        self,
        from_datetime: datetime.datetime,
        to_datetime: datetime.datetime,
        api_key: str,
        api_secret: str,
        tickers: List[str],
    ):
        """
        Initializes a new instance of the AlpacaNewsBatchClient class.

        Args:
            from_datetime (datetime.datetime): The start datetime for the news data.
            to_datetime (datetime.datetime): The end datetime for the news data.
            api_key (str): The API key for the Alpaca News API.
            api_secret (str): The API secret for the Alpaca News API.
            tickers (List[str]): A list of tickers to filter the news data.
        """

        self._from_datetime = from_datetime
        self._to_datetime = to_datetime
        self._api_key = api_key
        self._api_secret = api_secret
        self._tickers = tickers

        self._page_token = None
        self._first_request = True

    @property
    def try_request(self) -> bool:
        """
        A property indicating whether a request should be attempted.

        Returns:
            bool: True if a request should be attempted, False otherwise.
        """

        return self._first_request or self._page_token is not None

    def list(self):
        """
        Convenience function to fetch a batch of news from Alpaca API

        Returns:
            List[Dict]: A list of news items.
        """

        if not self.try_request:
            return None

        self._first_request = False

        # prepare the request URL
        headers = {
            "Apca-Api-Key-Id": self._api_key,
            "Apca-Api-Secret-Key": self._api_secret,
        }

        # Look at all the parameters here: https://alpaca.markets/docs/api-references/market-data-api/news-data/historical/
        # or here: https://github.com/alpacahq/alpaca-py/blob/master/alpaca/data/requests.py#L357
        params = {
            "start": self._from_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"),
            "end": self._to_datetime.strftime("%Y-%m-%dT%H:%M:%SZ"),
            "limit": 5,
            "include_content": True,
            "sort": "ASC",
            # "symbols": self._tickers
        }
        if self._page_token is not None:
            params["page_token"] = self._page_token

        response = requests.get(self.NEWS_URL, headers=headers, params=params)

        # parse output
        next_page_token = None
        if response.status_code == 200:  # Check if the request was successful
            # parse response into json
            news_json = response.json()

            # extract next page token (if any)
            next_page_token = news_json.get("next_page_token", None)

        else:
            print("Request failed with status code:", response.status_code)
            return None

        self._page_token = next_page_token

        return news_json["news"]


In [96]:
batch = AlpacaNewsBatchClient(
    from_datetime=datetime.datetime.now() - datetime.timedelta(days=1),
    to_datetime=datetime.datetime.now(),
    api_key=api_key,
    api_secret=api_secret,
    tickers=["*"],)

In [97]:
news_list = batch.list()

In [None]:
len(news_list)

5

In [110]:
flow = Dataflow("alpaca_news")

In [111]:
from bytewax.testing import TestingSource
stream = op.input("inp1", flow, TestingSource(news_list))

In [112]:
op.inspect("stream", stream)

Stream(stream_id='alpaca_news.stream.inspect_debug.down', _scope=_Scope(parent_id='alpaca_news'))

In [113]:
def times_two(inp):
    return inp['created_at']


double = op.map("double", stream, times_two)

In [114]:
op.output("out", double, StdOutSink())

In [115]:
from bytewax.testing import run_main

run_main(flow)

alpaca_news.stream: {'author': 'Benzinga Newsdesk', 'content': '', 'created_at': '2024-11-15T16:01:47Z', 'headline': 'Altimmune Presents New Data On The Effect Of Pemvidutide On Inflammatory Lipids In Subjects With Metabolic Dysfunction-Associated Steatotic Liver Disease At The Liver Meeting 2024', 'id': 42012472, 'images': [], 'source': 'benzinga', 'summary': '', 'symbols': ['ALT'], 'updated_at': '2024-11-15T16:01:48Z', 'url': 'https://www.benzinga.com/general/biotech/24/11/42012472/altimmune-presents-new-data-on-the-effect-of-pemvidutide-on-inflammatory-lipids-in-subjects-with-'}
2024-11-15T16:01:47Z
alpaca_news.stream: {'author': 'Benzinga Newsdesk', 'content': '', 'created_at': '2024-11-15T16:02:12Z', 'headline': "Atea Pharmaceuticals Unveils New Data Supporting Bemnifosbuvir And Ruzasvir Combination For Hepatitis C Treatment At AASLD's Liver Meeting 2024; Phase 2 Findings Highlight Potential Efficacy", 'id': 42012515, 'images': [], 'source': 'benzinga', 'summary': '', 'symbols': [

In [None]:
processed_stream = op.flat_map("parse_atom", filings_stream, parse_atom)
op.inspect("processed_stream", processed_stream)