In [None]:
import pandas as pd
import numpy as np
from numpy import nan
import yfinance as yf
import matplotlib.pyplot as plt
import pandas_datareader.data as reader
import datetime as dt
import statsmodels.api as sm
import csv
from scipy import stats

Henter tickers og prishistorikk

In [None]:
tickersobx = pd.read_html('https://no.wikipedia.org/wiki/OSEBX-indeksen')[0]
tickersobx = tickersobx['Ticker'].to_list()
tickersobx = [i.replace('OSE: ','')for i in tickersobx]
tickersobx = [i + ('.OL')for i in tickersobx]
tickersobx = [i.replace('TIETOO.OL','TIETO.OL')for i in tickersobx]
tickersobx = [i.replace('SCHB.OL','SCHBA.OL')for i in tickersobx]
tickers_to_remove = ['FJORD.OL', 'SRBANK.OL','NOFI.OL', 'AIRX.OL', 'AKH.OL', 'AGLX.OL']
tickersobx = [ticker for ticker in tickersobx if ticker not in tickers_to_remove]
#Tidsperiode
historie = yf.download(tickersobx, '2015-01-01', '2022-01-01', interval='1d', threads = True)['Adj Close']

Lager modellporteføljen

In [None]:
#Lengde på kort og langt glidende gjennomsnitt
SMALong = 200
SMAKort = 50

df_ma50 = historie.rolling(window=SMAKort).mean()
df_ma200 = historie.rolling(window=SMALong).mean()
df_bool = df_ma50 > df_ma200
df_new = df_bool.mask(df_bool == False, np.nan)
df_new['innhold'] = df_new.apply(lambda row: ','.join(row.index[row == True]), axis=1)
returns = historie.pct_change()
logreturns = np.log(1 + returns)
logreturns['innhold'] = df_new['innhold']
logreturns['summerteLogreturns'] = 0
logreturnsUtenInnhold = logreturns.drop('innhold', axis=1)
prevInnhold = ''
midlertidiglogreturn = 0
antHandler = 0

for index, row in logreturns.iterrows():

    innhold = logreturns.loc[index, 'innhold']
    innhold = innhold.split(',')
    midlertidiglogreturn = 0

    for col_name, cell_value in row.iteritems():

        if (col_name in innhold) and (len(innhold) > 1):
            midlertidiglogreturn += cell_value/(len(innhold)-1)

        # Kjøp
        if ((col_name in innhold) and (col_name not in prevInnhold)):
            antHandler += 1

        # Salg
        if ((col_name not in innhold) and (col_name in prevInnhold)):
            antHandler += 1

    logreturnsUtenInnhold.loc[index, 'summerteLogreturns'] += midlertidiglogreturn
    prevInnhold = innhold

# Kummulerer logreturns, gjør de om til aritmetisk return
logreturnsUtenInnhold['summerteLogreturns'] = np.exp(np.log(1 + logreturnsUtenInnhold['summerteLogreturns']).cumsum())
logreturnsUtenInnhold.index = logreturnsUtenInnhold.index.strftime('%Y-%m-%d')

# Fra odegardsjekk.ipynb, legger inn odegaard-verdier i en df og endrer index til datetime-format
df = pd.read_csv('./dataset/pricing_factors_daily.csv', index_col='date')
df.index = pd.to_datetime(df.index, format='%Y%m%d', errors='coerce')
portfolioDF = pd.DataFrame(columns=['portfolio'], index=['date'])

rows_to_concat = []
for index, row in logreturnsUtenInnhold.iterrows():
    if index in df.index:
        indeks = index
        new_row = {indeks: logreturnsUtenInnhold.loc[index,'summerteLogreturns']}
        rows_to_concat.append(pd.DataFrame.from_dict(new_row, orient='index', columns=['portfolio']))

if rows_to_concat:
    portfolioDF = pd.concat([portfolioDF] + rows_to_concat)

Kombinerer CAPM-data med modellportefølje for å kunne kjøre regresjonsanalysen 

In [None]:
portfolioDF.index.name = 'date'
portfolioDF = portfolioDF.drop('date')
portfolioDF = portfolioDF.pct_change()
portfolioDF = portfolioDF[SMALong:]
df = pd.read_csv('./Data_til_analyse/Daglige_tall_CAPM.csv', index_col='date')


df = df[['SMB', 'HML']]
rf = pd.read_csv('./dataset/rf_daily.csv', index_col='date')


new_df = df.copy()
new_df['rf'] = 0


# Convert the index to datetime objects
df.index = pd.to_datetime(df.index, format='%Y%m%d')
rf.index = pd.to_datetime(rf.index, format='%Y-%m-%d')


# Merge the two DataFrames based on the index
df_mergedPricingFactors = pd.merge(df, rf, left_index=True, right_index=True, how='inner')
merged_df = pd.merge(left=df_mergedPricingFactors, right=portfolioDF, left_index=True, right_index=True, how='inner')
df_mergedPricingFactors.index = pd.to_datetime(df_mergedPricingFactors.index, format='%Y%m%d')



for index, row in portfolioDF.iterrows():
    if index in df_mergedPricingFactors.index:
        new_row = {'SMB': df_mergedPricingFactors.loc[indeks, 'SMB'], 'HML': df_mergedPricingFactors.loc[indeks, 'HML'], 'rf(1d)':df_mergedPricingFactors.loc[indeks, 'rf(1d)'], 'portfolio':portfolioDF.loc[indeks, 'portfolio']}
        new_df = pd.DataFrame(new_row, index=[index], columns=['SMB', 'HML', 'rf(1d)', 'portfolio'])
        merged_df = pd.concat([merged_df, new_df])



mr = pd.read_csv('./dataset/market_portfolios_daily.csv', index_col='date')
mr = mr.drop('date')
mr = mr['OSEAX']
mr.index = pd.to_datetime(mr.index, format='%Y%m%d')
merged_df = pd.merge(left=merged_df, right=mr, left_index=True, right_index=True, how='inner')



try:
    portfolioDF = portfolioDF.drop('2017-04-13')
except KeyError:
    print("")
try:
    portfolioDF = portfolioDF.drop('2004-02-13')
except KeyError:
    print("")
try:
    portfolioDF = portfolioDF.drop('2004-02-18')
except KeyError:
    print("")
try:
    portfolioDF = portfolioDF.drop('2004-10-01')
except KeyError:
    print("")
try:
    portfolioDF = portfolioDF.drop('2005-09-05')
except KeyError:
    print("")
try:
    portfolioDF = portfolioDF.drop('2009-04-28')
except KeyError:
    print("")
try:
    portfolioDF = portfolioDF.drop('2009-11-25')
except KeyError:
    print("")
    

for index, row in portfolioDF.iterrows():
    if index in mr.index:
        new_row = {'OSEAX' : mr.loc[index], 'SMB': df_mergedPricingFactors.loc[index, 'SMB'], 'HML': df_mergedPricingFactors.loc[index, 'HML'], 'rf(1d)':df_mergedPricingFactors.loc[index, 'rf(1d)'], 'portfolio':portfolioDF.loc[index, 'portfolio']}
        new_df = pd.DataFrame(new_row, index=[index], columns=['SMB', 'HML', 'rf(1d)', 'portfolio', 'OSEAX'])
        merged_df = pd.concat([merged_df, new_df])
merged_df = merged_df.rename(columns={'rf(1d)': 'rf'})

OLS regresjon, 3-faktor CAPM

In [None]:
merged_df['OSEAX'] = merged_df['OSEAX'].astype(float)
merged_df['portfolio-rf'] = merged_df.portfolio - merged_df.rf
merged_df['MRKT-rf'] = merged_df.OSEAX - merged_df.rf

Y = merged_df['portfolio-rf']
X = merged_df[['MRKT-rf', 'SMB', 'HML']]

X_sm = sm.add_constant(X)

model = sm.OLS(Y, X_sm)
results = model.fit()
results.summary()

Test av MLR assumptions

In [None]:
predicted = model_new.predict(X_new)
residuals = model_new.resid
plt.scatter(fitted_values, residuals, label='Residualer', s=5)
plt.axhline(y=0, color='Black', linestyle='-', label='Null-linje')

# fit a first-order polynomial (line) to the data points
x = fitted_values
y = residuals
coefficients = np.polyfit(x, y, 1)
line = np.polyval(coefficients, x)

plt.plot(x, line, color='r', label='Regresjons linje')
plt.xlabel('Predikerte verdier')
plt.ylabel('Residualer')
plt.title('Residualer mot predikerte verdier 2015-2022')
plt.legend()
plt.show()

In [None]:
residuals = model_new.resid
plt.hist(residuals, bins=100, density=True, alpha=0.5, color='blue')
x = np.linspace(residuals.min(), residuals.max(), 100)
mu, std = np.mean(residuals), np.std(residuals)
pdf = stats.norm.pdf(x, mu, std)

plt.plot(x, pdf, color='red')
plt.xlabel('Residualer')
plt.ylabel('Frekvens')
plt.title('Fordeling av residualene 2010-2015')
plt.show()


In [None]:
merged_df = merged_df[['portfolio-rf', 'MRKT-rf', 'SMB', 'HML']]
merged_df.corr()

In [None]:
Y = merged_df['MRKT-rf']
X = merged_df[['SMB', 'HML']]

X_sm = sm.add_constant(X)

model = sm.OLS(Y, X_sm)
results = model.fit()
results.summary()

In [None]:
Y = merged_df['SMB']
X = merged_df[['MRKT-rf', 'HML']]

X_sm = sm.add_constant(X)

model = sm.OLS(Y, X_sm)
results = model.fit()
results.summary()

In [None]:
Y = merged_df['HML']
X = merged_df[['MRKT-rf', 'SMB']]

X_sm = sm.add_constant(X)

model = sm.OLS(Y, X_sm)
results = model.fit()
results.summary()