# FDNote6W.ipynb: Bloomberg API

Prepared by Inmoo Lee for the Financial Databases class at KAIST

inmool@kaist.ac.kr


In [None]:
import os
import numpy as np
import pandas as pd
from pandasql import sqldf

def pysqldf(q):
 return sqldf(q, globals())

In [None]:
import os #import a package called os

os.getcwd()  #get the current working directory
path='C:\\####' #Replace this with your directory to work with
os.chdir(path) # change the working directory

### To use Bloomberg API, you have to install two packages, blpapi and xbbg

!pip install --index-url=https://bcms.bloomberg.com/pip/simple blpapi## Bloomberg DAPI

First, install blpapi using the following command

!pip install --index-url=https://bcms.bloomberg.com/pip/simple blpapi


And install xbbg package (for reference, https://pypi.org/project/xbbg/)


!pip install xbbg

Using this package, you can use API functions in Excel in a similar way


When there are problems with the following modules, try to download the latest version of blpapi_cpp_3.14.31-windows and copy blpapi3_32.dll and blpapi3._64.dll under lib to c/blp/DAPI as explained here https://stackoverflow.com/questions/52897576/install-error-for-blpapi-in-python-for-bloomberg-api



In [None]:
!pip install --index-url=https://blpapi.bloomberg.com/repository/releases/python/simple/ blpapi


In [None]:
!pip install xbbg

In [None]:
import blpapi

In [None]:
from xbbg import blp

### Basic examples

Get the historical data using **bdh**

You can find the fields information by typing **FLDS** after choosing the instrument of your choice

### Use Help page for DAPI

In Bloomberg, Choose **"DAPI"**/**"Additional Resources"**/**"Help Page for DAPI"**.  You can find how to use various functions and the meanings of optional parameters used in each function.

#### For the historical data, use **bdh**

In [None]:
#Get help information for the blp module

print(dir(blp))
help(blp.bdh)#for historical data, use bdh

In [None]:
# to get the last prices of Apple during Jan 2, 2024 and Jan. 30, 2024,

blp.bdh(
    'AAPL US Equity', 'Px_Last', '20250102', '20250130'
)

In [None]:
## To get more information about options used in different functions, please
## check the Help page for DAPI

## No adjustment for stock splits
print(blp.bdh('AAPL US Equity', 'px_last', '20200825', '20200831',
        CapChg=False
        ))
##with adjustment for stock splits
print(
    blp.bdh('AAPL US Equity', 'px_last', '20200825', '20200831',
        CapChg=True
        ))

#### For the current data, use **bdp**

In [None]:
help(blp.bdp)#for the current value, use bdp

In [None]:
#To find the name of security and its current GICS sector name for Nvidia
print(blp.bdp(tickers='NVDA US Equity', flds=['Security_Name', 'GICS_Sector_Name']))

#### For the block data, use **bds**

In [None]:
###
help(blp.bds)

In [None]:
blp.bds('AAPL US Equity', 'EQY_DVD_HIST_SPLITS')#find all splits information for AAPL

In [None]:
blp.bds('AAPL US Equity', 'DVD_Hist_All', DVD_Start_Dt='20180101', DVD_End_Dt='20250631')
#Note that the information for August 2020 includes stock split, not the dividend information.
#In addition, the dividends per share prior to the stock split are adjusted for stock splits.


#### For other information, check the following

In [None]:
##to find intraday tick data
help(blp.bdib)

In [None]:
print(blp.bdib(ticker='SPY US Equity', dt='2025-07-01').head())

In [None]:
#To get dividend/split history
help(blp.dividend)

In [None]:
#Citigroup, Morgan Stanley and Mirae Asset dividend history
blp.dividend(['C US Equity', 'MS US Equity', '006800 KS Equity'], start_date='2018-01-01', end_date='2025-07-01')

## Portfolio Example

Use stored EQS in Bloomberg to retrieve the results within Python code.

#### blp.beqs(): Equity screening

As explained in the class note, you can save the results of an equity screening (EQS) in Bloomberg and then, execute the stored screening within Python using blp.beqs() as shown below

In [None]:
######get the search result from Bloomberg EQS
help(blp.beqs)

In [None]:
## Here, LowPE_KSE is stored in Bloomberg Equity Screen

df=blp.beqs('LowPE_KSE')
df.head()

In [None]:
#You can also specify the date for the search result

df=blp.beqs('LowPE_KSE',asof='20210630')
print(df.columns)
print(df.head())

### We can calculate the returns using "TOT_RETURN_INDEX_GROSS_DVDS", which includes dividends and adjusted for stock splits and other events.

#### Retrieve the total return index including dividends to calculate returns

- *dft* is from Appendix 1 (add Equity to the ticker symbol so that the data can be retrieved from Bloomberg).
- You should execute Appendix 1 to create the list of ticker symbols with "Equity" attached


In [None]:
####################################333
###get the return data
###use the total return index gross dividends to calcualte returns
###this is an index adjusted for stock splits, dividend and so forth

# dft is from Appendix 1
# Run the cell in Appendix 1 before running this cell
# dft is df with the stock ids with "Equity" added
ret=blp.bdh(dft, ['TOT_RETURN_INDEX_GROSS_DVDS'], '20120629', '20250630',Period = 'M')
print(ret.head())

#### Calculate the returns using the adprc (TOT_RETURN_INDEX_GROSS_DVDS)

- Before running the following cell, run Appendix 2 so that the dataframe with multi-level columns (ret) is converted to the dataframe with single-level columns, retd

In [None]:
#Run the cell in Appendix 2 before running this cell
print(retd.columns)# retd is from Appendix 2

In [None]:
retdata=retd.sort_values(['ticker','date']).reset_index(drop=True)#sort by id and date

#As emphasized before, it is important to use the shift() with groupby
#to corrently identify the lagged price index for each ticker
retdata.loc[:,'lagprc']=retdata.groupby('ticker').adprc.shift(1)#find lagged price index
retdata.loc[:,'ret']=(retdata.adprc-retdata.lagprc)/retdata.lagprc#calculate returns
print(retdata.head(150))

#### Retrieve the market cap in each month during June 2012 and June 2025

In [None]:
mc=blp.bdh(dft, ['CUR_MKT_CAP'], '20120630', '20250630',Period = 'M')
print(mc.head())

#### As was done before, please run Appendix 2-1 to change the dataframe with multi-level columns to the dataframe with single-level columns, and create mcd

In [None]:
# Run the cell in Appendix 2-1 before running this cell
print(mcd.head())
print(mcd.date.drop_duplicates()) #to check the dates covered in the mcd 

### Let's get the market-to-book ratio information

#### Get the MB and PB ratios at the end of June

- Run Appendix 3 to create the list of End of June of each year during 11 years (formlist)
- What have been done in Appendix 2 are include here to convert the dataframe with multi-level columns created from bdh to the dataframe with single-level columns, and then rename columns

In [None]:
#For each date in formlist created by running Appendix 3, get the MB and PB ratios
#formlist is from Appendix 3

# Run the cell in Appendix 3 before running this cell
for d in formlist:#for each day in formlist
    mb=blp.bdh(dft, ['MARKET_CAPITALIZATION_TO_BV',\
                     'PX_TO_BOOK_RATIO'],d, d, adjust='all')#note that the starting and end dates are same (d)

    ### The following are same as Appendix 2, which conver the multiindex dataframe to a single index dataframe
    ### and then rename the columns
    mbd=mb.stack(0,future_stack=True).reset_index()
    
    mbd=mbd.rename(columns={'level_0':'date',
                               'level_1':'ticker',
                           'MARKET_CAPITALIZATION_TO_BV':'mb',
                           'PX_TO_BOOK_RATIO':'pb'})
    ### Will store the output into mbdata0 by merging the data on each end of June to mbdata0
    if d == sdate:#for the first day, copy the MB/PB data to a new dataframe, mbdata0
        mbdata0=mbd
    else:#for the rest of dates in the list, add the data to the exisiting mbdata0
        mbdata0=pd.merge(mbdata0,mbd,how='outer',on=['ticker','date','mb','pb'])

print(mbdata0.head())
print(mbdata0.tail())
print(mbdata0.shape)

### Form MB portfolios

- As was done in FDNote5W2025.ipynb, find the cutoff points and then use SQL to assign portfolio values.
- Alternatively, one can use the pd.cut to form portfolios as shown in Appendix 4

In [None]:
## First, find out the cutoff points for the quintile formation
mb2 = mbdata0.groupby(['date'])['mb'].describe(percentiles=[.2, .4, .6,.8]).reset_index()
mb2 = mb2[['date','20%','40%','60%','80%']]\
.rename(columns={'20%':'quint20','40%':'quint40','60%':'quint60','80%':'quint80'})
print(mb2.head())
# add cutoff points to the original mb data
query='''select a.*,b.*
              from mbdata0 as a 
              left join mb2 as b 
              on a.date = b.date'''
              
mb3=pysqldf(query)
print(mb3.columns)
print(mb3.head())
### skip the first column since there are two dates
mb3=mb3.iloc[:,1:]# skip the first column (date in mb0)
#check
print(mb3.head())
print(mb3.columns)

#Find out which MB portfolios each stock belongs to in each year
query='''select a.*,
                case when a.mb <= a.quint20 then 1 else 
                    case when a.mb <= a.quint40 then 2 else
                        case when a.mb <= a.quint60 then 3 else
                            case when a.mb <= a.quint80 then 4 else 5
                            end
                        end
                    end
                end as mbp
            from mb3 as a'''
mbdata=pysqldf(query)
print(mbdata.head())

#### First, find out the market cap information at the end of June of each year
- This was done in Appendix 5 (output file is mcret)
- Please run the cell in Appendix 5 to create *mcret*

In the *mcret* data (which includes market cap at the end of June of each year), we need to add returns and market cap (at the beginning of month) from July - June of the year following the formation of MB portfolios at the end of June of each year
- Market cap is used to calculate the value-weighted return in each month.

In [None]:
#mcret is from Appendix 5
# Run the cell in Appendix 5 before running this cell
print(mcret.head()) #annual, junemc is the market cap in June on date
print(retdata.head()) #monthly 

#### Add monthly return data to the annual mcret data

Here, notice that returns from July to June of the next year are added to June in the mcret data

Here, fdate is the date one year ahead of the date in **mcret** when the portfolio is formed

In [None]:
## Merge the mcret and retdata to get the return data for each stock
# from July of the year date in mcret belongs to to June of the next year
#Note that the mcret is annual data, while retdata is monthly data
q1='''
    select a.*,b.date as retdate,b.ret
    from mcret as a
    left join retdata as b
    on a.ticker=b.ticker and (a.date <b.date
                               and a.fdate >=b.date)
    order by a.ticker,a.date,b.date
    '''

mcretdata=pysqldf(q1)

#conver the date and retdate to datetime format
mcretdata['date']=pd.to_datetime(mcretdata['date'])#convert the date to datetime format that can be recognized as date in python
mcretdata['retdate']=pd.to_datetime(mcretdata['retdate'])

mcretdata[['ticker','date','retdate','ret']].head(150)
mcretdata.drop(columns='fdate',inplace=True)#drop fdate column
print(mcretdata.head(13))

#### Combine the files
- Create a file to include the MB portfolio information, the market cap at the end of June, monthly returns and market caps as well as lagged market cap information
- Run Appendix 6 to create *mcrmbd*

#### Drop rows with missing data

In [None]:
## mcrmbd is the merged data created in Appendix 6
# Run the cell in Appendix 6 before running this cell
print(mcrmbd.shape)
print(mcrmbd.head())
# drop rows with missing data
mcrmbd1=mcrmbd.dropna(axis = 0, how ='any') 
print(mcrmbd1.head())
print(mcrmbd1.shape)

### Now, you are ready to calculate portfolio returns

In [None]:
####ready to calculate portfolio returns###
print(mcrmbd1.columns)

portq='''
    select a.mbp, a.retdate, avg(a.mb) as amb, avg(a.mc) as amc,
        avg(a.ret) as ewret, 
        sum(a.ret*a.lmc)/sum(a.lmc) as vwret,
        count(a.ret) as numstock
    from mcrmbd1 as a
    group by a.mbp, a.retdate
    order by a.mbp, a.retdate
    '''
portret=pysqldf(portq)
print(portret.head())
print(portret.groupby('mbp').mean(numeric_only=True))

# Appendix
- Adding "Equity" to the ticker
- Converting the format of the results
- Finding the list of dates
- Finding the market cap at the end of June of each year
- Adding monthly market cap data and finding lagged market cap 
- Conducting the statistical significance tests between high and low groups

## Appendix 1

Add "Equity" to the ticker so that you can retrieve the data in Bloomberg


In [None]:
###Add to use in Bloomberg, we need to add ' Equity' to the ticker symbol
import numpy as np

print(df.ticker.head())
dfticker=list(df.ticker) # make the list of ticker symbols in the Dataframe df

dft=list(np.array(dfticker)) #make the ar
print(dft)

ct=0
for k in dfticker:
    kn=k+' Equity'
    #print(ct,k,kn)
    dft[ct]=kn
    ct = ct+1    
    
print(dft)

## Appendix 2: Convert the format of the results (multi-level columns) to , retd (single-level columns)

The result is a dataframe with multi-level columns

    - The first level column is ticker
    - The second level column is the data item

To convert it, you can stack it and then reset_index as explained here, https://pandas.pydata.org/pandas-docs/stable/user_guide/reshaping.html

DataFrame.stack(level=- 1, dropna=True)[source]
- level=-1 is default and it stacks the last level (data item)
- level=0 to stack the first level (Ticker)

Stack the prescribed level(s) from multi-level columns to index.

In [None]:
print(ret.head())#Multi-level columns, first level-Ticker, Second level - Data Item
#Stack the data to convert multi-level columns to single level columns
#By resetting the index, the old index (date) becomes a column 
retd=ret.stack(0,future_stack=True).reset_index()
print('\n After converting the format \n')
print(retd.head())

#change the names of columns
retd=retd.rename(columns={'level_0':'date',
                    'level_1':'ticker',
                    'TOT_RETURN_INDEX_GROSS_DVDS':'adprc'})
print('\n After name changes \n')

print(retd.head())

## Appendix 2-1

Same as above for the market cap data

In [None]:
mcd=mc.stack(0,future_stack=True).reset_index()
mcd=mcd.rename(columns={'level_0':'date',
                    'level_1':'ticker',
                    'CUR_MKT_CAP':'mc'})
print(mcd.head())

## Appendix 3: List of end of June dates to retrieve MB ratios

To retrieve the MB ratios at the end of June of each year, we want to first find out the list of dates (end of June of each year)


In [None]:
from pandas.tseries.offsets import BMonthEnd,MonthEnd,BDay
from datetime import datetime as dt

#dt.strptime ("string parser time") is used to parse a string
#representing a date and time into a datetime object.
sdate=dt.strptime('06/29/2012',"%m/%d/%Y")# to use this as a starting date

print(sdate)

## Find End of June business dates for the next 14 years from the startding date, sdate
formlist = [sdate+BMonthEnd(x*12) for x in range(14)]
print(formlist)

## Appendix 4: Alternative ways to form MB portfolios using pd.qcut

In [None]:
###form portfolios based on mb
mbdata0.loc[:,'mbp']=mbdata0.groupby('date')['mb'].\
        transform(lambda x: pd.qcut(x, 5, labels=False,duplicates='drop'))\
            +1

print(mbdata0.head())
print(mbdata0.mbp.describe())

## Appendix 5: Market cap at the end of June

To form the MB portfolios at the end of June, we need to find the market cap and other information at the end of June of each year


In [None]:
### mcd includes monthly market capitalization

mcd['date']=pd.to_datetime(mcd['date']) #change the format to datetime format

#formd is the data that contain the end of June of each year
#get the june date (drop duplicates) and reset the index
formd=mcd[(mcd.date.dt.month==6)].date.drop_duplicates().reset_index()
formd.drop(columns='index',inplace=True)
print(formd)#list of the end of June to form portfolios

###figure out the date one year after each date in formd
formd.loc[:,'fdate']=formd.date+MonthEnd(12)
print(formd.head())

###In each June, add market cap data in june
mcret=pd.merge(formd,mcd,how='left',on='date')#merge two dataframes: formd has only June data
mcret.rename(columns={'mc':'junemc'},inplace=True)
print(mcret.head())
print(mcret.columns)

## Appendix 6: Combine files (return, monthly market cap and MB)

- Add monthly market cap data to the *mcretdata and then, merge it with the MB data.  In addition, find the lagged market cap to be used for the value-weighted return calculation
- You can use a sql program, instead of pd.merge used below


In [None]:
##########################################################################
###add actual market cap in each month to the return data
mcd.rename(columns={'date':'mcdate'},inplace=True)# monthly market cap data
mcrd=pd.merge(mcretdata,mcd[['mcdate','ticker','mc']],how='left',
              left_on=['retdate','ticker'],right_on=['mcdate','ticker'])
mcrd.drop(columns='mcdate',inplace=True)# drop the mcdate, which is same as retdate

#convert the object column to a datetime column
mbdata['date']=pd.to_datetime(mbdata['date'])

##to make sure that date is the end of a month for both MB and return files
mbdata.loc[:,'date']=mbdata.date+MonthEnd(0)
mcrd.loc[:,'date']=mcrd.date+MonthEnd(0)


##Merge two dataframes
mcrmbd=pd.merge(mcrd,mbdata,how='left',on=['ticker','date'])

#######################################################################
#get lagged mc to be used as weight
#######################################################################
mcrmbd.sort_values(['ticker','retdate'],inplace=True) #sort by ticker and date

## Note that shift is used after sorting and with "groupby(['ticker'])"
mcrmbd.loc[:,'lmc']=mcrmbd.groupby(['ticker']).mc.shift(1)
print(mcrmbd[['ticker','retdate','mc','lmc']].head(150))

## Appendix 7: Significance tests

Note that the MB portfolios are formed using the results of LowPE

Since the exercise is using the results of stocks with low PE ratios, the differences between low and high MB portfolios among the low PE ratios are not expected to be so significant.

### For the statistical significance test

Test the difference between High and Low groups

In [None]:
#Define a function to test the difference 
def HighMinusLow(data,pvar,mvar):#pvar=portfolio #mvar=return
    #ew
    em1=data[(data[pvar]==4)][mvar].mean()#choose the highest quartile
    em2=data[(data[pvar]==1)][mvar].mean()#choose the lowest quartile
    m=em1-em2#find the difference in mean
    #the following calcualte standard errors assuming zero covariance between quartiles
    s=np.sqrt(pow(data[(data[pvar]==4)][mvar].sem(),2)+pow(data[(data[pvar]==1)][mvar].sem(),2))
    tew=m/s#t-statistics
    print('Results','Mean Diff','{:10.6f}'.format(m),'T-stat',
          '({:10.2f})'.format(tew),
          'High Mean','{:10.6f}'.format(em1),
          'Low  Mean','{:10.6f}'.format(em2))

######
HighMinusLow(portret,'mbp','ewret')
HighMinusLow(portret,'mbp','vwret')
########################################################################