In [69]:
from ibapi.client import EClient
from ibapi.wrapper import EWrapper
from ibapi.contract import Contract
from ibapi.common import *  # noqa
from threading import Thread
import time
import nest_asyncio
nest_asyncio.apply()

import re
from datetime import datetime, timedelta

In [71]:
class IBApi(EWrapper, EClient):
    def __init__(self):
        EClient.__init__(self, self)
        self.headlines = []  # List to store news headlines
        self.articles = {}  # Dictionary to store article content
        self.price_data = []
        self.conId = None  # Store the retrieved conId
        self.news_providers = []  # Store available news providers

    def contractDetails(self, reqId, contractDetails):
        self.conId = contractDetails.contract.conId

    def newsProviders(self, newsProviders):
        self.news_providers = [provider.code for provider in newsProviders]

    def historicalData(self, reqId: int, bar):
        self.price_data.append({
            "time": bar.date,
            "open": bar.open,
            "high": bar.high,
            "low": bar.low,
            "close": bar.close,
            "volume": bar.volume
        })

    def historicalNews(self, reqId: int, time: str, providerCode: str, articleId: str, headline: str):
        # Save each headline to the list
        self.headlines.append({
            "time": time,
            "providerCode": providerCode,
            "articleId": articleId,
            "headline": headline
    })

    def newsArticle(self, reqId: int, articleType: int, articleText: str):
        # Save the article content to the dictionary
        # print(f"Article Text (Plain Text or HTML): {articleText}")
        self.articles[reqId] = articleText
        #print("requestId: ", reqId, "articleType: ", articleType, "articleText: ", articleText)

def run_loop(api):
    api.run()

def clean_news_headlines(news_headlines):
    ''' Helper '''
    
    query = []
    #print(news_headlines)
    for news in news_headlines:
        cleaned = re.sub(r'\{.*?\}', '', news['headline']) + '- ' + news['time']
        query.append(cleaned)

    return query

In [73]:
## Initialize the IBApi object
app = IBApi()

# Connect to TWS or IB Gateway
app.connect("127.0.0.1", 7497, clientId=1)

# Start the API thread
api_thread = Thread(target=run_loop, args=(app,), daemon=True)
api_thread.start()

In [91]:
def get_Data(contract, endDate, historicalDataParams, historicalNewsParam):

    historical_data_format = endDate.strftime("%Y%m%d %H:%M:%S") + " US/Eastern"
    historical_news_format = endDate.strftime("%Y-%m-%d %H:%M:%S") + ".0"

    app.reqContractDetails(1, contract)
    time.sleep(2)  # Wait for the conId to be retrieved
    if app.conId is None:
        raise Exception("Failed to retrieve conId for the contract")

    app.reqNewsProviders()
    time.sleep(2)  # Wait for news providers to be retrieved
    if not app.news_providers:
        raise Exception("Failed to retrieve news providers")

    news_providers = "+".join(app.news_providers)
    #print(news_providers)

    app.reqHistoricalData(
        reqId=1,
        contract=historicalDataParams['contract'],
        endDateTime=historical_data_format,  # End of the day in UTC
        durationStr=historicalDataParams['historicalDataDuration'],
        barSizeSetting=historicalDataParams['frequency'],             # 2-hour bars
        whatToShow="MIDPOINT",                # Midpoint prices
        useRTH=1,                             # Regular Trading Hours only
        formatDate=1,
        keepUpToDate=False,
        chartOptions=None                     # No special chart options
    )
    
    # Request historical news for the contract
    time.sleep(2)  # Wait for connection #Use the startdate, I havent figured how the date range works but we can simply filter out the news later on. :)
    app.reqHistoricalNews(1, app.conId, news_providers, historical_news_format, "", historicalNewsParam['numberOfHeadlines'], None)  # Fetch the latest 10 news items

    # Fetch content for each article
    time.sleep(5) # Wait for news to fetch

    news_headlines = app.headlines
    print(len(news_headlines))
    
    article_request_dictionary = {}
    request_uniq_id = 1000
    for article in app.headlines:
        article_request_dictionary[request_uniq_id + 1] = article['articleId']
        request_uniq_id += 1
        
    for request_ID in article_request_dictionary:
        try:
            # Extract provider code and article ID
            provider_code, article_id = article_request_dictionary[request_ID].split('$')[0], article_request_dictionary[request_ID]
            
            # Request the news article
            print(f"Requesting article: Request ID = {request_ID}, Provider = {provider_code}, Article ID = {article_id}")
            app.reqNewsArticle(request_ID, provider_code, article_id, [])
            
            # Throttle requests to avoid pacing violations
            time.sleep(1)
            
        except Exception as e:
            # Handle any exceptions during the API call
            print(f"Error fetching article with Request ID = {request_ID}: {e}")


    # Wait for news to fetch
    time.sleep(5)
    
    # Save headlines and article content to variables
    #news_headlines = clean_news_headlines(app.headlines)
    news_articles = app.articles
    prices = app.price_data

    return prices, news_headlines, news_articles, article_request_dictionary


In [93]:
contract = Contract()
contract.symbol = "AMD"
contract.secType = "STK"
contract.exchange = "SMART"
contract.currency = "USD"

endDate = datetime(2024, 10, 29, 21, 44, 0)

In [95]:
historicalDataParams = {
    'contract': contract,
    'historicalDataDuration': '1 W',  # One week
    'frequency': '2 hours',
}

historicalNewsParam = {
    'numberOfHeadlines' : 170
}

In [97]:
a, b, c, d = get_Data(contract, endDate, historicalDataParams, historicalNewsParam)

372


ERROR 1098 10172 Failed to request news article:No data available
ERROR 1204 10172 Failed to request news article:No data available
ERROR 1310 10172 Failed to request news article:No data available


In [105]:
app.disconnect()

In [65]:
b[97]

{'time': '2024-10-29 04:30:00.0',
 'providerCode': 'DJ-N',
 'articleId': 'DJ-N$190e8c0b',
 'headline': "{A:800015:L:en:K:n/a:C:0.9388824105262756}AMD's Guidance for AI Will Be Key for Earnings -- Barrons.com"}

In [102]:
len(d)

372

In [108]:
import pandas as pd

In [118]:
df = pd.read_csv('russell-1000-index-12-19-2024.csv')
df = df.iloc[:1000]

In [120]:
df

Unnamed: 0,Symbol,Name,Last,Change,%Chg,Open,High,Low,Volume,Time
0,A,Agilent Technologies,133.47,0.46,+0.35%,133.14,133.7000,131.6200,551568.0,13:05 ET
1,AA,Alcoa Corp,36.56,-0.75,-2.01%,37.75,38.1500,36.5900,1646866.0,13:06 ET
2,AAL,American Airlines Gp,16.50,0.03,+0.18%,16.65,16.7950,16.5500,9322014.0,13:07 ET
3,AAON,Aaon Inc,124.22,1.65,+1.35%,124.35,126.7900,123.5000,120662.0,13:04 ET
4,AAP,Advance Auto Parts Inc,41.92,-1.05,-2.44%,43.45,44.1600,41.1700,770119.0,13:05 ET
...,...,...,...,...,...,...,...,...,...,...
995,YETI,Yeti Holdings Inc,39.60,-1.17,-2.87%,41.82,42.0498,39.5285,630620.0,13:06 ET
996,YUM,Yum! Brands,131.41,-0.05,-0.04%,131.24,132.6600,130.5500,538468.0,13:07 ET
997,Z,Zillow Group Cl C,75.00,-0.47,-0.62%,76.37,77.2100,74.8000,1227355.0,13:06 ET
998,ZBH,Zimmer Biomet Holdings,105.48,-0.38,-0.36%,105.12,106.2500,104.4700,509804.0,13:07 ET
