# Get Point in time datasets
Find all of the dates when companies first published their quarterly reports

In [113]:
import bql
import pandas as pd
import helper

In [115]:
import importlib
importlib.reload(helper)

<module 'helper' from '/project/helper.py'>

In [2]:
bq = bql.Service()

In [3]:
# Index to use for point in time firms
index = 'SPX Index'

# rebalance dates for the index
rebalance_dates = ['2023-12-31',
        '2023-09-30',
        '2023-06-30',
        '2023-03-31',
        '2022-12-31',
        '2022-09-30',
        '2022-06-30',
        '2022-03-31',
        '2021-12-31',
        '2021-09-30',
        '2021-06-30',
        '2021-03-31',
        '2020-12-31',
        '2020-09-30',
        '2020-06-30',
        '2020-03-31',
        '2019-12-31',
        '2019-09-30',
        '2019-06-30',
        '2019-03-31',]


# Calculate the Rebalance dates

For each rebalance date, get the members of the index at that time and look up their reporting dates

In [5]:
all_data = []

def get_reporting_dates_per_rebalance(date):
    univ = bq.univ.members(index, dates=date)
    field = bq.data.sales_rev_turn(dates=bq.func.range('-5Y','0D'), fa_period_type='Q')
    req = bql.Request(univ, field)
    data = bq.execute(req)
    df = data[0].df().dropna()
    return df.sort_values('PERIOD_END_DATE', ascending=True).reset_index().drop_duplicates(subset=['ID','PERIOD_END_DATE'], keep='first')

In [19]:
def get_rebalance_dates():
    for date in rebalance_dates:
        all_data.append(get_reporting_dates_per_rebalance(date))
        print("Complete for ", date)
    df = pd.concat(all_data)
    df_concat = df[['ID','AS_OF_DATE','PERIOD_END_DATE']].sort_values('PERIOD_END_DATE', ascending=True).drop_duplicates(subset=['ID','PERIOD_END_DATE'], keep='first')
    return df_concat.set_index(['AS_OF_DATE','ID']).sort_values(['AS_OF_DATE'])

In [20]:
df_rebalance_dates = get_rebalance_dates()

Complete for  2023-12-31
Complete for  2023-09-30
Complete for  2023-06-30
Complete for  2023-03-31
Complete for  2022-12-31
Complete for  2022-09-30
Complete for  2022-06-30
Complete for  2022-03-31
Complete for  2021-12-31
Complete for  2021-09-30
Complete for  2021-06-30
Complete for  2021-03-31
Complete for  2020-12-31
Complete for  2020-09-30
Complete for  2020-06-30
Complete for  2020-03-31
Complete for  2019-12-31
Complete for  2019-09-30
Complete for  2019-06-30
Complete for  2019-03-31


In [24]:
# pull out the list of as of dates - will use this to loop through the securities
dates = df_rebalance_dates.reset_index()['AS_OF_DATE'].unique()

### Get each as of date and request the securities

In [63]:
as_of_date = str(dates[3])[0:10]
securities = list(df_rebalance_dates.loc[as_of_date].index)
as_of_date

'2020-01-28'

In [49]:
securities

['PNR UN Equity',
 'MKC UN Equity',
 'CHRW UW Equity',
 'PHM UN Equity',
 'EQR UN Equity',
 'HCA UN Equity',
 'RTX UN Equity',
 'CCL UN Equity',
 'SBUX UW Equity',
 'MXIM UW Equity',
 'XRX UN Equity',
 'XLNX UW Equity',
 'MMM UN Equity',
 'EBAY UW Equity',
 'ALK UN Equity']

In [9]:
# calculate the announcment dates
announce_dates = df_rebalance_dates[['AS_OF_DATE']].drop_duplicates().sort_values('AS_OF_DATE')

In [116]:
univ, is_fields, bs_fields, price = helper.setup_request(securities, as_of_date)

In [53]:
req = bql.Request(securities,is_fields)
data = bq.execute(req)

In [102]:
# Convert the request into correct format for data frame
def format_request_to_df(data, fields):
    fields = list(fields.keys())
    df_all = [data[index].df()[data[index].df()['PERIOD_END_DATE'] != 0]
                  .pivot(columns='PERIOD_END_DATE', values=[fields[index]])
                  .fillna(0) 
                  for index in range(0,len(fields))]
    df2 = pd.concat(df_all, axis=1)
    df3 = df2.stack().transpose().stack().unstack(level=0).transpose().fillna(0)
    df4 = df3.loc[:, (df3 != 0).any(axis=0)]
    # Reformat the columns to remove dates
    if len(df4.columns) == 6:
        df5 = df4.set_axis(['t-5','t-4','t-3','t-2','t-1', 't'], axis='columns')
    else:
        df5 = df4.drop(columns=df4.columns[0:(len(df4.columns)-6)])
        df5 = df5.set_axis(['t-5','t-4','t-3','t-2','t-1', 't', 't0' ], axis='columns')
    # Reverse the direction of the dataset
    df6 = df5[df5.columns[::-1]]
    return df6.loc[(df6!=0).any(axis=1)]

def convert_to_dict(securities, df_is, df_bs, df_price):
    date = {}
    data = {}
    for security in securities:
        data['is'] = df_is.loc[security].to_string()
        data['bs'] = df_bs.loc[security].to_string()
        date[security] = data
    return date
        

In [85]:
# test run
req = bql.Request(securities, is_fields)
data_is = bq.execute(req)
req = bql.Request(securities, bs_fields)
data_bs = bq.execute(req)
df_is = format_request_to_df(data_is, is_fields)
df_bs = format_request_to_df(data_bs, bs_fields)

In [104]:
df_is.loc['ALK UN Equity']

Unnamed: 0,t,t-1,t-2,t-3,t-4,t-5
01 Revenue,2228000000.0,2389000000.0,2288000000.0,1876000000.0,2064000000.0,2212000000.0
02 Sales and Services Revenues,2228000000.0,2389000000.0,2288000000.0,1876000000.0,2064000000.0,2212000000.0
10 Operating Expenses,1976000000.0,1967000000.0,1924000000.0,1851000000.0,2018000000.0,1915000000.0
13 Other Operating Expenses,1793000000.0,1784000000.0,1732000000.0,1673000000.0,1829000000.0,1737000000.0
14 Operating Income or Losses,252000000.0,422000000.0,364000000.0,25000000.0,46000000.0,297000000.0
15 Non-Operating (Income) Loss,9000000.0,6000000.0,13000000.0,19000000.0,10000000.0,13000000.0
16 Net Interest Expense,3000000.0,3000000.0,6000000.0,9000000.0,7000000.0,6000000.0
17 Interest Expense,14000000.0,14000000.0,17000000.0,18000000.0,16000000.0,17000000.0
18 Interest Income,11000000.0,11000000.0,11000000.0,9000000.0,9000000.0,11000000.0
20 Other Non-Operating (Income) Loss,6000000.0,3000000.0,7000000.0,10000000.0,3000000.0,7000000.0


In [97]:
t = convert_to_dict(securities, df_is, df_bs, df_is)

In [101]:
t['PNR UN Equity']['is']

'                                                        t           t-1           t-2           t-3           t-4           t-5\n01 Revenue                                   2.228000e+09  2.389000e+09  2.288000e+09  1.876000e+09  2.064000e+09  2.212000e+09\n02 Sales and Services Revenues               2.228000e+09  2.389000e+09  2.288000e+09  1.876000e+09  2.064000e+09  2.212000e+09\n10 Operating Expenses                        1.976000e+09  1.967000e+09  1.924000e+09  1.851000e+09  2.018000e+09  1.915000e+09\n13 Other Operating Expenses                  1.793000e+09  1.784000e+09  1.732000e+09  1.673000e+09  1.829000e+09  1.737000e+09\n14 Operating Income or Losses                2.520000e+08  4.220000e+08  3.640000e+08  2.500000e+07  4.600000e+07  2.970000e+08\n15 Non-Operating (Income) Loss               9.000000e+06  6.000000e+06  1.300000e+07  1.900000e+07  1.000000e+07  1.300000e+07\n16 Net Interest Expense                      3.000000e+06  3.000000e+06  6.000000e+06  9.000000e

In [117]:
univ = securities
field = bq.data.px_last(dates=bq.func.range('-12M', as_of_date), currency='USD', fill='prev')

req = bql.Request(univ, field)
data = bq.execute(req)

In [118]:
data[0].df()

Unnamed: 0_level_0,DATE,CURRENCY,"PX_LAST(dates=RANGE(-12M,2020-01-28),currency='USD',fill='prev')"
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
PNR UN Equity,2019-01-28,USD,40.54
PNR UN Equity,2019-02-28,USD,42.54
PNR UN Equity,2019-03-28,USD,43.93
PNR UN Equity,2019-04-28,USD,38.41
PNR UN Equity,2019-05-28,USD,35.35
...,...,...,...
ALK UN Equity,2019-09-28,USD,64.18
ALK UN Equity,2019-10-28,USD,70.08
ALK UN Equity,2019-11-28,USD,69.17
ALK UN Equity,2019-12-28,USD,68.29
