In [1]:
import pandas as pd
import numpy as np
from functions import *
import re

%load_ext autoreload
%autoreload 2

pd.set_option('future.no_silent_downcasting', True)

In [2]:
# set paths to data
path = '/Users/johan/Library/CloudStorage/GoogleDrive-johan.oelgaard@gmail.com/My Drive/04 Økonomi/10 Thesis/Data'

# read daily market data from eikon
daily = 'eikon_daily.xlsx'
eikon_dfs = pd.read_excel(path + '/' + daily, sheet_name=None)
eikon_keys = eikon_dfs.keys()

In [3]:
# load trade data
trade_values_df = eikon_dfs['Trade Values'].iloc[:,1:]
# set up multi-index for the columns
trade_values_df.columns = pd.MultiIndex.from_arrays(trade_values_df.iloc[:2].values)
# drop the first two rows as they are now headers
trade_values_df = trade_values_df.iloc[2:].reset_index(drop=True)
# set the first column as index
trade_values_df.set_index(trade_values_df.columns[0], inplace=True)
trade_values_df.index.name = "timestamp"
trade_values_df = trade_values_df.sort_index(axis=1, level=0)
# keep only trade close values
trade_values_df = trade_values_df.loc[:, (slice(None), 'Trade Close')]
# set 0 values to NaN
trade_values_df = trade_values_df.replace(0, np.nan)

# backward fill the data for each ticker
idx = pd.IndexSlice

# loop over the tickers that are actually in the df
for ticker in trade_values_df.columns.get_level_values(0).unique():
    # extract the sub-dataframe for this ticker using .loc with IndexSlice
    subdf = trade_values_df.loc[:, idx[ticker, :]]
    
    # find the index range where the ticker has any valid data
    valid_idx = subdf.dropna(how='all').index

    # use backward fill in the date range
    trade_values_df.loc[valid_idx.max():valid_idx.min(), idx[ticker, :]] = trade_values_df.loc[valid_idx.max():valid_idx.min(), idx[ticker, :]].bfill()

# stack first level of columns to rows
trade_values_df = trade_values_df.stack(level=0).reset_index()
# rename columns
trade_values_df.columns = ['timestamp', 'Ticker', 'Trade Close']
# set first column as index
trade_values_df.set_index('timestamp', inplace=True)

# calculate the daily returns
trade_values_df = trade_values_df.sort_values(by=['Ticker', 'timestamp'], ascending=[True, True])
trade_values_df['stkre'] = trade_values_df.groupby('Ticker')['Trade Close'].pct_change()


  return Index(sequences[0], name=names)
  trade_values_df = trade_values_df.stack(level=0).reset_index()


In [4]:
# display where ticker is AA
display(trade_values_df.loc[(trade_values_df['Ticker'] == 'AAB.CO') & (trade_values_df.index >= '2001-03-01') & (trade_values_df.index <= '2001-04-01')])

Unnamed: 0_level_0,Ticker,Trade Close,stkre
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2001-03-01,AAB.CO,3765.143726,0.0
2001-03-02,AAB.CO,3660.556401,-0.027778
2001-03-05,AAB.CO,3660.556401,0.0
2001-03-06,AAB.CO,3686.703232,0.007143
2001-03-07,AAB.CO,3686.703232,0.0
2001-03-08,AAB.CO,3738.996895,0.014184
2001-03-09,AAB.CO,3738.996895,0.0
2001-03-12,AAB.CO,3738.996895,0.0
2001-03-13,AAB.CO,3660.556401,-0.020979
2001-03-14,AAB.CO,3451.381749,-0.057143


In [5]:
# load index data
omxcpi = eikon_dfs['OMXCPI'].iloc[:,1:]
# set first row as header
omxcpi.columns = omxcpi.iloc[0]
# drop the first row as it is now header
omxcpi = omxcpi.iloc[1:].reset_index(drop=True)
# set the first column as index
omxcpi.set_index(omxcpi.columns[0], inplace=True)
omxcpi = omxcpi.sort_index(axis=1)
# keep only closing values
omxcpi = omxcpi.loc[:,'Trade Close']
# convert to dataframe
omxcpi = pd.DataFrame(omxcpi)
# rename columns
omxcpi.columns = ['OMXCPI']

omxcpi = omxcpi.sort_index(ascending=True)
omxcpi['mktre'] = omxcpi['OMXCPI'].pct_change()

  return Index(sequences[0], name=names)


In [None]:
# calculate the rolling beta
# join the two dataframes on index
beta = trade_values_df.join(omxcpi, how='left')
# drop Trade Close and OMXCPI columns and calculate beta
beta = beta.drop(columns=['Trade Close', 'OMXCPI']).dropna()
beta = beta.groupby('Ticker').apply(rolling_beta)

# create df
beta = beta.reset_index()
beta.columns = ['ticker', 'timestamp', 'beta']
# set the index to timestamp
beta.set_index('timestamp', inplace=True)

# display(beta.loc[(beta['ticker'] == 'AAB.CO') & (beta.index >= '2001-03-01') & (beta.index <= '2001-04-01')])
# # display(beta)

# # set 0 to NaN
# beta = beta.replace(0, np.nan)

# display(beta.loc[(beta['ticker'] == 'AAB.CO') & (beta.index >= '2001-03-01') & (beta.index <= '2001-04-01')])

# # for each ticker, ffill the beta values
# beta['beta'] = beta.groupby('ticker')['beta'].apply(lambda x: x.ffill())

# # display(b)

# display(beta.loc[(beta['ticker'] == 'AAB.CO') & (beta.index >= '2001-03-01') & (beta.index <= '2001-04-01')])



# save the beta to csv
beta.to_csv('data/beta.csv')

  beta = beta.groupby('Ticker').apply(rolling_beta)
