In [16]:
### Installing the required packages if not already installed
packages = ['numpy', 'pandas', 'warnings', 'sqlite3', 'yfinance', 'numba', 'time']

for package in packages:
    try:
        __import__(package)
    except ImportError:
        %pip install {package}


### Start timer
import time
start = time.time()

import numpy as np
import pandas as pd
import warnings
import sqlite3
import os

### Ignoring the warnings
warnings.filterwarnings('ignore')

### Setting working directory
os.chdir('/Users/emilwilliamhansen/Desktop/Master-Thesis/Code')

In [17]:
### Getting all the factor data
liq_monthly = pd.read_csv("Data/ba-odegaard.no/liq_measures_ose_monthly.txt",
                        sep=',', encoding='latin1')

liq_daily = pd.read_csv("Data/ba-odegaard.no/liq_measures_ose_daily.txt",
                        sep=',', encoding='latin1')

mkt_daily = pd.read_csv("Data/ba-odegaard.no/market_portfolios_daily.txt",
                        sep=',', encoding='latin1')

mkt_monthly = pd.read_csv("Data/ba-odegaard.no/market_portfolios_monthly.txt",
                        sep=',', encoding='latin1')

factors_daily = pd.read_csv("Data/ba-odegaard.no/pricing_factors_daily.txt",
                        sep=',', encoding='latin1')

factors_monthly = pd.read_csv("Data/ba-odegaard.no/pricing_factors_monthly.txt",
                        sep=',', encoding='latin1')

rf_daily = pd.read_csv("Data/ba-odegaard.no/rf_daily.txt",
                        sep=',', encoding='latin1', skiprows=1)

rf_monthly = pd.read_csv("Data/ba-odegaard.no/rf_monthly.txt",
                        sep=',', encoding='latin1', skiprows=1)

In [18]:
### Fixing the dates and putting them as the index
liq_monthly['date'] = pd.to_datetime(liq_monthly['date'], format='%Y%m%d')
liq_monthly.set_index('date', inplace=True)

liq_daily['date'] = pd.to_datetime(liq_daily['date'], format='%Y%m%d')
liq_daily.set_index('date', inplace=True)

mkt_daily['date'] = pd.to_datetime(mkt_daily['date'], format='%Y%m%d')
mkt_daily.set_index('date', inplace=True)

mkt_monthly['date'] = pd.to_datetime(mkt_monthly['date'], format='%Y%m%d')
mkt_monthly.set_index('date', inplace=True)

factors_daily['date'] = pd.to_datetime(factors_daily['date'], format='%Y%m%d')
factors_daily.set_index('date', inplace=True)

factors_monthly['date'] = pd.to_datetime(factors_monthly['date'], format='%Y%m%d')
factors_monthly.set_index('date', inplace=True)

rf_daily['date'] = pd.to_datetime(rf_daily['date'], format='%Y%m%d')
rf_daily.set_index('date', inplace=True)

rf_monthly['date'] = pd.to_datetime(rf_monthly['date'], format='%Y%m%d')
rf_monthly.set_index('date', inplace=True)

In [19]:
### Putting factors_daily and rf_daily together
factors_daily = pd.concat([factors_daily, rf_daily, mkt_daily[["EW", "VW"]]], axis=1)

### Changing the column name of Rf(1d) to rf
factors_daily.rename(columns={'Rf(1d)': 'rf'}, inplace=True)

### Interpolating the risk free rate
factors_daily['rf'] = factors_daily['rf'].interpolate()

### Doing the same for the monthly data
factors_monthly = pd.concat([factors_monthly, rf_monthly, mkt_monthly[["EW", "VW"]]], axis=1)
factors_monthly.rename(columns={'Rf(1m)': 'rf'}, inplace=True)
factors_monthly['rf'] = factors_monthly['rf'].interpolate()

### Restricting the period to 1980-2023
factors_daily = factors_daily['1980':'2023']

factors_monthly = factors_monthly['1980':'2023']

### Getting the excess returns of EW and VW
factors_daily['EW'] = factors_daily['EW'] - factors_daily['rf']
factors_daily['VW'] = factors_daily['VW'] - factors_daily['rf']
factors_monthly['EW'] = factors_monthly['EW'] - factors_monthly['rf']
factors_monthly['VW'] = factors_monthly['VW'] - factors_monthly['rf']

In [20]:
### Adding the factors to the database
conn = sqlite3.connect('Data/data.db')

factors_daily.to_sql('factors_daily', conn, if_exists='replace')
factors_monthly.to_sql('factors_monthly', conn, if_exists='replace')
liq_daily.to_sql('liq_daily', conn, if_exists='replace')
liq_monthly.to_sql('liq_monthly', conn, if_exists='replace')

conn.close()