In [1]:
### Installing the required packages if not already installed
packages = ['numpy', 'pandas', 'warnings', 'sqlite3', 'time']

for package in packages:
    try:
        __import__(package)
    except ImportError:
        %pip install {package}


### Start timer
import time
start = time.time()

import numpy as np
import pandas as pd
import warnings
import sqlite3
import os

### Ignoring the warnings
warnings.filterwarnings('ignore')

### Setting working directory
os.chdir('/Users/emilwilliamhansen/Desktop/Master-Thesis/Code')

#### Goal:

Here I will simply make sure all the monthly and daily data alligns with each other. So, we will simply import all data, make sure it has the same stocks and the same periods in it.

In [2]:
### Importing the monthly data
### Connecting to the database
conn = sqlite3.connect('Data/data.db')

### Getting data from the SQL database
monthly_returns = pd.read_sql_query("SELECT * FROM monthly_returns", conn, index_col='index')
daily_returns = pd.read_sql_query("SELECT * FROM daily_returns", conn, index_col='index')


filtered_monthly_returns = pd.read_sql_query("SELECT * FROM filtered_monthly_returns", conn, index_col='index')
filtered_daily_returns = pd.read_sql_query("SELECT * FROM filtered_daily_returns", conn, index_col='index')


factors_monthly = pd.read_sql_query("SELECT * FROM factors_monthly", conn, index_col='date')
factors_daily = pd.read_sql_query("SELECT * FROM factors_daily", conn, index_col='date')
liq_monthly = pd.read_sql_query("SELECT * FROM liq_monthly", conn, index_col='date')
liq_daily = pd.read_sql_query("SELECT * FROM liq_daily", conn, index_col='date')

index_monthly_returns = pd.read_sql_query("SELECT * FROM index_monthly_returns", conn, index_col='date')
index_daily_returns = pd.read_sql_query("SELECT * FROM index_daily_returns", conn, index_col='date')
index_monthly_prices = pd.read_sql_query("SELECT * FROM index_monthly_prices", conn, index_col='date')
index_monthly_prices = pd.read_sql_query("SELECT * FROM index_monthly_prices", conn, index_col='date')

### Closing the connection
conn.close()

#### Fixing the indexes

In [3]:
monthly_returns.index = pd.to_datetime(monthly_returns.index)
daily_returns.index = pd.to_datetime(daily_returns.index)

filtered_monthly_returns.index = pd.to_datetime(filtered_monthly_returns.index)
filtered_daily_returns.index = pd.to_datetime(filtered_daily_returns.index)

factors_monthly.index = pd.to_datetime(factors_monthly.index)
factors_daily.index = pd.to_datetime(factors_daily.index)
liq_monthly.index = pd.to_datetime(liq_monthly.index)
liq_daily.index = pd.to_datetime(liq_daily.index)

index_monthly_returns.index = pd.to_datetime(index_monthly_returns.index)
index_daily_returns.index = pd.to_datetime(index_daily_returns.index)
index_monthly_prices.index = pd.to_datetime(index_monthly_prices.index)
index_monthly_prices.index = pd.to_datetime(index_monthly_prices.index)

#### Renaming all the dates colums to 'Date'

In [4]:
monthly_returns.index.name = 'Date'
daily_returns.index.name = 'Date'

filtered_monthly_returns.index.name = 'Date'
filtered_daily_returns.index.name = 'Date'

factors_monthly.index.name = 'Date'
factors_daily.index.name = 'Date'
liq_monthly.index.name = 'Date'
liq_daily.index.name = 'Date'

index_monthly_returns.index.name = 'Date'
index_daily_returns.index.name = 'Date'
index_monthly_prices.index.name = 'Date'
index_monthly_prices.index.name = 'Date'

#### Restricting the period to 1980-2023

In [5]:
### Making all returns go from 1980-01-01 to 2023-12-31
monthly_returns = monthly_returns.loc['1980-01-01':'2023-12-31']
daily_returns = daily_returns.loc['1980-01-01':'2023-12-31']

filtered_monthly_returns = filtered_monthly_returns.loc['1980-01-01':'2023-12-31']
filtered_daily_returns = filtered_daily_returns.loc['1980-01-01':'2023-12-31']


factors_monthly = factors_monthly.loc['1980-01-01':'2023-12-31']
factors_daily = factors_daily.loc['1980-01-01':'2023-12-31']
liq_monthly = liq_monthly.loc['1980-01-01':'2023-12-31']
liq_daily = liq_daily.loc['1980-01-01':'2023-12-31']

index_monthly_returns = index_monthly_returns.loc['1980-01-01':'2023-12-31']
index_daily_returns = index_daily_returns.loc['1980-01-01':'2023-12-31']
index_monthly_prices = index_monthly_prices.loc['1980-01-01':'2023-12-31']
index_monthly_prices = index_monthly_prices.loc['1980-01-01':'2023-12-31']

#### Fixing the shapes

In [6]:
print('Monthly Returns:', monthly_returns.shape)
print('Filtered Monthly Returns:', filtered_monthly_returns.shape)
print('Factors Monthly:', factors_monthly.shape)
print('Liq Monthly:', liq_monthly.shape)
print('Index Monthly Returns:', index_monthly_returns.shape)
print('Index Monthly Prices:', index_monthly_prices.shape)

Monthly Returns: (528, 1074)
Filtered Monthly Returns: (528, 995)
Factors Monthly: (528, 6)
Liq Monthly: (491, 4)
Index Monthly Returns: (491, 156)
Index Monthly Prices: (492, 156)


In [7]:
liq_monthly = liq_monthly.reindex(monthly_returns.index)
index_monthly_returns = index_monthly_returns.reindex(monthly_returns.index)
index_monthly_prices = index_monthly_prices.reindex(monthly_returns.index)
factors_monthly = factors_monthly.reindex(monthly_returns.index)

In [8]:
print('Daily Returns:', daily_returns.shape)
print('Filtered Daily Returns:', filtered_daily_returns.shape)
print('Factors Daily:', factors_daily.shape)
print('Liq Daily:', liq_daily.shape)
print('Index Daily Returns:', index_daily_returns.shape)
print('Index Daily Prices:', index_daily_returns.shape)

Daily Returns: (11041, 1074)
Filtered Daily Returns: (11041, 995)
Factors Daily: (11102, 6)
Liq Daily: (10265, 2)
Index Daily Returns: (10131, 156)
Index Daily Prices: (10131, 156)


In [9]:
factors_daily = factors_daily.reindex(daily_returns.index)
liq_daily = liq_daily.reindex(daily_returns.index)
index_daily_returns = index_daily_returns.reindex(daily_returns.index)
index_monthly_prices = index_monthly_prices.reindex(daily_returns.index)

### Reshuffeling the factors

In [10]:
factors_monthly

Unnamed: 0_level_0,SMB,HML,UMD,rf,EW,VW
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1980-01-31,,,,0.00827,-0.013986,0.015765
1980-02-29,,,,0.00822,0.066012,-0.061009
1980-03-31,,,,0.00828,-0.084741,-0.222098
1980-04-30,,,,0.00834,0.008976,0.104559
1980-05-31,,,,0.00828,0.016639,0.114166
...,...,...,...,...,...,...
2023-08-31,-0.038960,0.052521,0.031523,0.00362,-0.022485,0.016426
2023-09-30,0.010048,0.006260,0.005747,0.00374,0.000380,0.033582
2023-10-31,-0.038059,0.027076,0.026071,0.00370,-0.036421,0.005711
2023-11-30,0.048981,-0.041344,0.006850,0.00372,0.089996,0.015369


In [11]:
factors_monthly = factors_monthly[['VW', 'EW', 'SMB', 'HML', 'UMD', 'rf']]
factors_daily = factors_daily[['VW', 'EW', 'SMB', 'HML', 'UMD', 'rf']]

#### Saving all the data again

In [12]:
### Saving all the data to the same database
conn = sqlite3.connect('Data/data.db')

monthly_returns.to_sql('monthly_returns', conn, if_exists='replace')
daily_returns.to_sql('daily_returns', conn, if_exists='replace')

filtered_monthly_returns.to_sql('filtered_monthly_returns', conn, if_exists='replace')
filtered_daily_returns.to_sql('filtered_daily_returns', conn, if_exists='replace')


factors_monthly.to_sql('factors_monthly', conn, if_exists='replace')
factors_daily.to_sql('factors_daily', conn, if_exists='replace')
liq_monthly.to_sql('liq_monthly', conn, if_exists='replace')
liq_daily.to_sql('liq_daily', conn, if_exists='replace')

index_monthly_returns.to_sql('index_monthly_returns', conn, if_exists='replace')
index_daily_returns.to_sql('index_daily_returns', conn, if_exists='replace')
index_monthly_prices.to_sql('index_monthly_prices', conn, if_exists='replace')
index_monthly_prices.to_sql('index_monthly_prices', conn, if_exists='replace')

conn.close()