In [1]:
import sqlite3
import pandas as pd
from itertools import permutations

import blpapi
from blpapi import Session, SessionOptions
import datetime
from tqdm import tqdm

In [2]:
import blpapi
from blpapi import Session, SessionOptions
import pandas as pd
import datetime
from datetime import timedelta
import numpy as np
from tqdm import tqdm

def init_session():
    options = SessionOptions()
    options.setServerHost("localhost")
    options.setServerPort(8194)
    session = Session(options)
    if not session.start():
        raise RuntimeError("Failed to start session.")
    if not session.openService("//blp/refdata"):
        raise RuntimeError("Failed to open //blp/refdata.")
    return session

def get_turnover(ticker, session):
    refDataService = session.getService("//blp/refdata")
    request = refDataService.createRequest("HistoricalDataRequest")
    request.append("securities", ticker)
    request.append("fields", "HS020")  # Earnings announcement dates

    end_date = datetime.datetime.now().strftime('%Y%m%d')
    start_date = (datetime.datetime.now()-timedelta(365*10)).strftime('%Y%m%d')

    request.set("startDate", start_date)
    request.set("endDate", end_date)

    session.sendRequest(request)

    values = {}
    while True:
        event = session.nextEvent()
        for msg in event:
            if msg.hasElement("securityData"):
                #print(msg)
                securityData = msg.getElement("securityData")
                fieldData = securityData.getElement("fieldData")
                for i in range(fieldData.numValues()):
                    data = fieldData.getValue(i)
                    values[data['date']] = data['HS020']
                        
        if event.eventType() == blpapi.Event.RESPONSE:
            break

    return values

from tqdm import tqdm
if __name__ == "__main__":
    session = init_session()

    # Load tickers from CSV
    ticker_df = pd.read_csv("data/sp500_tickers.csv", header=None)
    tickers = ticker_df[0].tolist()

    all_data = {}
    for i, ticker in tqdm(enumerate(tickers[:5])):
        try:
            dates = get_turnover(f'{ticker} US EQUITY', session)
            all_data[ticker] = dates
        except Exception as e:
            print(f"Failed to get data for {ticker}: {e}")
            all_data[ticker] = []

    # Convert to DataFrame (columns = tickers, rows = earninbgs dates)
    df = pd.DataFrame(dict([(k, pd.Series(v)) for k, v in all_data.items()]))
    df.to_csv('data/turnover.csv')
    print('Data saved to data/turnover.csv')


5it [00:01,  3.00it/s]

Data saved to data/turnover.csv





In [19]:
def load_tickers(filepath="data/sp500_tickers.csv"):
    with open(filepath) as f:
        return [line.strip() for line in f if line.strip()]

TICKS = load_tickers()

In [20]:
pairs = list(permutations(TICKS,2))

In [21]:
pairs[:5]

[('MMM', 'AOS'),
 ('MMM', 'ABT'),
 ('MMM', 'ABBV'),
 ('MMM', 'ACN'),
 ('MMM', 'ADBE')]

In [22]:
# Connect to database
conn = sqlite3.connect('data/pairs_database.db')

# Read specific pair into DataFrame
df = pd.read_sql('SELECT * FROM pair_MMM_ACN', conn, index_col='Date', parse_dates=['Date'])

# Close connection
conn.close()

# Display
df

Unnamed: 0_level_0,MMM_price,ACN_price,MMM_sector,ACN_sector,MMM_ln_price,ACN_ln_price,coint_p_value,slope,y_intercept,r_squared,y_implied,curr_residual,z_residual,ratio,logRatio,avg_ratio,std_dev,z_ratio
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2014-01-02,79.708466,66.436462,Industrials,Information Technology,4.378376,4.196246,,,,,,,,1.199770,1.043403,,,
2014-01-03,79.893150,66.657585,Industrials,Information Technology,4.380690,4.199569,,,,,,,,1.198561,1.043129,,,
2014-01-06,79.419945,65.953339,Industrials,Information Technology,4.374750,4.188948,,,,,,,,1.204184,1.044355,,,
2014-01-07,79.431503,66.755852,Industrials,Information Technology,4.374895,4.201042,,,,,,,,1.189881,1.041383,,,
2014-01-08,78.842896,67.271759,Industrials,Information Technology,4.367457,4.208741,,,,,,,,1.172006,1.037711,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-06,145.500000,317.649994,Industrials,Information Technology,4.980176,5.760950,0.324590,0.715177,2.173168,0.804713,5.734876,0.026074,1.332102,0.458051,0.864471,0.468011,0.004750,-0.577359
2025-06-09,144.229996,316.619995,Industrials,Information Technology,4.971409,5.757702,0.325385,0.711106,2.193478,0.798904,5.728675,0.029027,1.472228,0.455530,0.863436,0.467923,0.004774,-0.779643
2025-06-10,145.059998,320.920013,Industrials,Information Technology,4.977147,5.771192,0.411241,0.712417,2.187275,0.792083,5.733081,0.038111,1.889748,0.452013,0.862412,0.467593,0.004808,-0.977187
2025-06-11,147.179993,319.220001,Industrials,Information Technology,4.991656,5.765881,0.109759,0.715030,2.174610,0.788901,5.743794,0.022086,1.084554,0.461061,0.865723,0.467437,0.004800,-0.275847


In [None]:
def init_session():
    session = Session()
    if not session.start():
        print("Failed to start session")
        return
    if not session.openService("//blp/refdata"):
        print("Failed to open service.")
        return
    return session


In [None]:
def get_price_data(tick, session):
    refDataService = session.getService("//blp/refdata")
    request = refDataService.createRequest("HistoricalDataRequest")

    start_date = (datetime.datetime.today() - datetime.timedelta(days=365*10)).strftime("%Y%m%d")
    end_date = datetime.datetime.today().strftime("%Y%m%d")

    request.getElement("securities").appendValue(tick)
    request.getElement("fields").appendValue(tick)
    request.set("startDate", start_date)
    request.set("endDate", end_date)
    request.set("periodicitySelection", "DAILY")

    session.sendRequest(request)

    dates = []
    values = []

    while True:
        event = session.nextEvent()
        for msg in event:
            if msg.hasElement("securityData"):
                data = msg.getElement("securityData").getElement("fieldData")
                for i in range(data.numValues()):
                    item = data.getValueAsElement(i)
                    if item.hasElement('PX_LAST'):
                        dates.append(item.getElementAsDatetime("date").date())
                        values.append(item.getElementAsFloat('PX_LAST'))
        if event.eventType() == blpapi.Event.RESPONSE:
            break

    session.stop()
    return pd.Series(values, index=pd.to_datetime(dates), name=tick)

