In [1]:
### Installing the required packages if not already installed
packages = ['numpy', 'pandas', 'warnings', 'sqlite3', 'yfinance', 'numba', 'time']

for package in packages:
    try:
        __import__(package)
    except ImportError:
        %pip install {package}


### Start timer
import time
start = time.time()

import numpy as np
import pandas as pd
import warnings
import sqlite3
import yfinance as yf
import os

from numba import njit

### Ignoring the warnings
warnings.filterwarnings('ignore')

### Setting working directory
os.chdir('/Users/emilwilliamhansen/Library/Mobile Documents/com~apple~CloudDocs/School/Master Thesis/Code')

#### Reading the data - Monthly

In [2]:
### Reading the datasets
monthly_80_20 = (pd.read_csv("Data/monthly_stock_returns_ose.csv",
                      sep=';', encoding='latin1')[["ISIN", "ticker", "Last_Sec_Name", "Date", "MonthlyReturn", "LastPrice", "NoShares"]]
                      ).rename(columns={"Last_Sec_Name": "Name", "MonthlyReturn": "Return", "LastPrice": "Price", "NoShares": "Shares"})

monthly_20_24 = (pd.read_csv('Data/ose_equity_euronext_data/monthly_ose_stocks_nov_2020_aug_2024.csv',
                         sep=',', encoding='latin1')[["ISIN", "ticker", "Name", "Date", "Return", "Price", "SharesOutstanding"]]
                         ).rename(columns={"SharesOutstanding": "Shares"})

### Combining the datasets
monthly = pd.concat([monthly_80_20, monthly_20_24])
#del monthly_80_20, monthly_20_24

### Fixing the date format
monthly["Date"] = pd.to_datetime(monthly["Date"], format="%Y%m%d")

monthly

Unnamed: 0,ISIN,ticker,Name,Date,Return,Price,Shares
0,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-01-31,0.0000,240.00,4000.0
1,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-02-29,0.0000,240.00,4000.0
2,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-03-31,0.0000,240.00,4000.0
3,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-04-30,0.0417,250.00,4000.0
4,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-05-31,0.4600,325.00,4000.0
...,...,...,...,...,...,...,...
14953,US36467X2062,GIG,GAMING INNOVATION,2024-08-31,-0.0334,28.95,127132040.0
14954,VGG3175Q1081,EPIC,Epic Gas,2020-12-31,0.0191,16.00,106616352.0
14955,VGG3175Q1081,EPIC,Epic Gas,2021-01-31,-0.0437,15.30,106616352.0
14956,VGG3175Q1081,EPIC,EPIC GAS,2021-02-28,0.1432,16.30,106616352.0


#### Reading the data - Daily

In [3]:
daily_80_90 = (pd.read_csv("Data/daily_stock_returns_ose_csv/daily_stock_returns_ose_1980_1989.csv",
                        sep=';', encoding='latin1')[["ISIN", "ticker", "Last_Sec_Name", "Date", "Return", "ClosePrice", "SharesOutstanding"]]
                        ).rename(columns={"Last_Sec_Name": "Name", "ClosePrice": "Price", "SharesOutstanding": "Shares"})

daily_90_00 = (pd.read_csv("Data/daily_stock_returns_ose_csv/daily_stock_returns_ose_1990_1999.csv",
                        sep=';', encoding='latin1')[["ISIN", "ticker", "Last_Sec_Name", "Date", "Return", "ClosePrice", "SharesOutstanding"]]
                        ).rename(columns={"Last_Sec_Name": "Name", "ClosePrice": "Price", "SharesOutstanding": "Shares"})

daily_00_10 = (pd.read_csv("Data/daily_stock_returns_ose_csv/daily_stock_returns_ose_2000_2009.csv",
                        sep=';', encoding='latin1')[["ISIN", "ticker", "Last_Sec_Name", "Date", "Return", "ClosePrice", "SharesOutstanding"]]
                        ).rename(columns={"Last_Sec_Name": "Name", "ClosePrice": "Price", "SharesOutstanding": "Shares"})

daily_10_20 = (pd.read_csv("Data/daily_stock_returns_ose_csv/daily_stock_returns_ose_2010_2020.csv",
                        sep=';', encoding='latin1')[["ISIN", "ticker", "Last_Sec_Name", "Date", "Return", "ClosePrice", "SharesOutstanding"]]
                        ).rename(columns={"Last_Sec_Name": "Name", "ClosePrice": "Price", "SharesOutstanding": "Shares"})

daily_20_24 = (pd.read_csv('Data/ose_equity_euronext_data/daily_ose_stocks_nov_2020_aug_2024.csv',
                         sep=';', encoding='latin1')[["ticker", "Name", "Date", "Return", "ClosePrice", "SharesOutstanding"]]
                        ).rename(columns={"ClosePrice": "Price", "SharesOutstanding": "Shares"})

### Fixing the ISIN values for 2020-2024 dataset by extracing the ISIN values from the monthly dataset
daily_20_24['ISIN'] = np.nan
daily_20_24 = daily_20_24.merge(monthly[["ISIN", "ticker"]], on="ticker", how="left").drop(columns='ISIN_x').rename(columns={"ISIN_y": "ISIN"})

### Combining the datasets
daily = pd.concat([daily_80_90, daily_90_00, daily_00_10, daily_10_20, daily_20_24])
del daily_80_90, daily_90_00, daily_00_10, daily_10_20, daily_20_24


### Fixing the date format
daily["Date"] = pd.to_datetime(daily["Date"], format="%Y%m%d")

daily

Unnamed: 0,ISIN,ticker,Name,Date,Return,Price,Shares
0,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-01-04,0.0000,,4000.0
1,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-01-08,0.0000,,4000.0
2,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-01-11,0.0000,,4000.0
3,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-01-15,0.0000,,4000.0
4,NO0003069908,AAT,Aust-Agder Trafikkselskap,1980-01-18,0.0000,,4000.0
...,...,...,...,...,...,...,...
35407819,VGG3175Q1081,EPIC,EPIC GAS,2021-03-04,-0.0338,14.3,106616352.0
35407820,VGG3175Q1081,EPIC,EPIC GAS,2021-03-05,-0.0350,13.8,106616352.0
35407821,VGG3175Q1081,EPIC,EPIC GAS,2021-03-05,-0.0350,13.8,106616352.0
35407822,VGG3175Q1081,EPIC,EPIC GAS,2021-03-05,-0.0350,13.8,106616352.0


In [4]:
### Getting the values unique rows for ISIN and ticker
isin_ticker = pd.concat([monthly[["ISIN", "ticker"]], daily[["ISIN", "ticker"]]]).drop_duplicates().reset_index(drop=True)

### Getting the rows where the ISIN values are not unique
isin = isin_ticker[isin_ticker.duplicated(subset='ISIN', keep=False)].sort_index().sort_values(by='ISIN')

### If a row has a NaN value in the ticker column, then we keep the row with the ticker value
isin = isin.dropna(subset=['ticker'])

### Dropping the duplicate ISINs, but keeping the last one
isin = isin.drop_duplicates(subset='ISIN', keep='last')

### Creating a dictionary to map ISIN to ticker
isin_dict = isin.set_index('ISIN')['ticker'].to_dict()

### Updating the ticker column in the daily and monthly datasets
daily['ticker'] = daily['ISIN'].map(isin_dict).fillna(daily['ticker'])
monthly['ticker'] = monthly['ISIN'].map(isin_dict).fillna(monthly['ticker'])

#### Looking at all our tickers

In [5]:
### Lets see if the tickers overlap as expected
daily_tickers = daily['ticker'].unique()
monthly_tickers = monthly['ticker'].unique()

missing_tickers_daily = [ticker for ticker in monthly_tickers if ticker not in daily_tickers]
print(f"We have {len(missing_tickers_daily)} tickers in monthly that are not in daily")

missing_tickers_monthly = [ticker for ticker in daily_tickers if ticker not in monthly_tickers]
print(f"We have {len(missing_tickers_monthly)} tickers in daily that are not in monthly")

KeyboardInterrupt: 

In [None]:
### Looking at only the tickers that are in missing_tickers_monthly
daily[daily['ticker'].isin(missing_tickers_monthly)].value_counts("Name")

So, these dont even have a full trading month of returns, so no reason to include them.

In [None]:
### Removing the ticker is in missing_tickers_daily, remove the row
daily = daily[~daily['ticker'].isin(missing_tickers_daily)]

In [None]:
### Getting all the unique tickers from both datasets
companies = pd.concat([daily, monthly])[['ticker', 'Name']].drop_duplicates(subset='ticker').sort_values('ticker').reset_index(drop=True)
companies

In [None]:
# Ensure dates and tickers are in the right order
dates = daily['Date'].drop_duplicates().sort_values()
tickers = companies['ticker']

### Lets create the pivot tables
daily_returns = daily.pivot_table(
    index='Date',
    columns='ticker',
    values='Return',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

daily_prices = daily.pivot_table(
    index='Date',
    columns='ticker',
    values='Price',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

daily_shares = daily.pivot_table(
    index='Date',
    columns='ticker',
    values='Shares',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

daily_returns

In [None]:
# Ensure dates and tickers are in the right order
dates = monthly['Date'].drop_duplicates().sort_values()

### Lets turn the long format into wide format
monthly_returns = monthly.pivot_table(
    index='Date',
    columns='ticker',
    values='Return',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

monthly_prices = monthly.pivot_table(
    index='Date',
    columns='ticker',
    values='Price',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

monthly_shares = monthly.pivot_table(
    index='Date',
    columns='ticker',
    values='Shares',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

monthly_returns

### Lets download the daily prices of the missing data

In [None]:
# ### Getting the tickers
# yf_ticker = [i + ".OL" for i in daily_prices.columns]

# ### Downloading the data
# yf_price = yf.download(yf_ticker, start='2020-01-01', end='2020-12-31')['Close']

# ### Removing the .OL from the columns
# yf_price.columns = [i.replace(".OL", "") for i in yf_price.columns]

# ### Making sure the index is a datetime
# yf_price.index = pd.to_datetime(yf_price.index)

# ### Saving the downloaded data as a csv
# yf_price.to_csv('Data/data_2020_yfinance.csv')

### Reading the data
yf_price = pd.read_csv('Data/data_2020_yfinance.csv', index_col=0)

### Making sure the index is a datetime
yf_price.index = pd.to_datetime(yf_price.index)

### Getting the returns
yf_returns = yf_price.pct_change()

### Drop the first row
yf_returns = yf_returns.iloc[1:]

### Replace the 0 with NaN
yf_returns = yf_returns.replace(0, np.nan)

yf_returns

In [None]:
# ### Getting the extra prices from CIQ
# ciq_price = pd.read_excel("Data/data_2020_CIQ.xlsx", index_col=0)

# ### Making sure the index is a datetime
# ciq_price.index = pd.to_datetime(ciq_price.index)

# ### Removing -OB from the columns
# ciq_price.columns = [i.replace("-OB", "") for i in ciq_price.columns]

# ### Remove the columns that are only NaN
# ciq_price = ciq_price.dropna(axis=1, how='all')

# ### Removing 3 columns that are not in the original data
# ciq_price = ciq_price.drop(columns=['MELG-OSL', 'SB1NO', 'SOR.1', 'OBSRV', 'PROT', 'REACH', 'SCANA', 'STRO', 'SUBC', 'TGS', 'ULTI', 'ABG', 'AGAS'])

# ### Getting the returns
# ciq_returns = ciq_price.pct_change()

# ### Drop the first row
# ciq_returns = ciq_returns.iloc[1:]

# ### Replace the 0 with NaN
# ciq_returns = ciq_returns.replace(0, np.nan)

# ### Reindexing ciq_returns to match yf_returns
# ciq_returns = ciq_returns.reindex(index=yf_returns.index, columns=yf_returns.columns)

# #ciq_returns

In [None]:
# ### Checking for NaNs
# nanprice = yf_price.isnull().sum().sum()
# nanreturns = yf_returns.isnull().sum().sum()
# print(f"We have {nanprice} NaNs in the price dataset")
# print(f"We have {nanreturns} NaNs in the returns dataset")

# ### Lets use ciq_price to fill in the missing values in yf_price
# for tickers in ciq_price.columns:

#     ### Filling the missing prices
#     for i in range(len(ciq_price)):
#         if np.isnan(yf_price.loc[ciq_price.index[i], tickers]):
#             yf_price.loc[ciq_price.index[i], tickers] = ciq_price.loc[ciq_price.index[i], tickers]
        
#     ### Filling the missing returns
#     for i in range(len(ciq_returns)):
#         if np.isnan(yf_returns.loc[ciq_returns.index[i], tickers]):
#             yf_returns.loc[ciq_returns.index[i], tickers] = ciq_returns.loc[ciq_returns.index[i], tickers]

# ### Checking NaNs
# nanprice2 = yf_price.isnull().sum().sum()
# nanreturns2 = yf_returns.isnull().sum().sum()
# print(f"We have {nanprice2} NaNs in the price dataset, so we filled in {nanprice - nanprice2} NaNs")
# print(f"We have {nanreturns2} NaNs in the returns dataset, so we filled in {nanreturns - nanreturns2} NaNs")

In [None]:
### Getting the monthly data
yf_returns_monthly = yf_returns.resample('M').apply(lambda x: (1 + x).prod() - 1)
yf_price_monthly = yf_price.resample('M').last()

### Replace the 0 with NaN
yf_returns_monthly = yf_returns_monthly.replace(0, np.nan)
yf_price_monthly = yf_price_monthly.replace(0, np.nan)

### Restricting the data to only the ones we need
yf_returns = yf_returns.loc['2020-07-01':'2020-11-30']
yf_price = yf_price.loc['2020-07-01':'2020-11-30']
yf_returns_monthly = yf_returns_monthly.loc['2020-07-01':'2020-11-30']
yf_price_monthly = yf_price_monthly.loc['2020-07-01':'2020-11-30']

In [None]:
### Filling the index
idx = daily_prices.index.union(yf_returns.index)
daily_returns = daily_returns.reindex(idx)
daily_prices = daily_prices.reindex(idx)
daily_shares = daily_shares.reindex(idx)

### Filling the data
daily_returns.loc['2020-07-01':'2020-11-30'] = yf_returns
daily_prices.loc['2020-07-01':'2020-11-30'] = yf_price

### Filling the index
idx = monthly_prices.index.union(yf_returns_monthly.index)
monthly_returns = monthly_returns.reindex(idx)
monthly_prices = monthly_prices.reindex(idx)
monthly_shares = monthly_shares.reindex(idx)

### Filling the data
monthly_returns.loc['2020-07-01':'2020-11-30'] = yf_returns_monthly
monthly_prices.loc['2020-07-01':'2020-11-30'] = yf_price_monthly

#### Fixing number of share for the missing period.

More spesificaly, if we have the the numbers of share for the first or the last period, we simply fill the missing period in with this number. If we have the numbers of share for the first AND the last period, we interpolate.

In [None]:
### Getting the dates where we are missing data
shares_monthly_2020 = monthly_shares.loc['2020-06-01':'2020-12-31']
shares_daily_2020 = daily_shares.loc['2020-06-30':'2020-12-01']

### Looping trough it
for i in range(len(shares_monthly_2020.columns)):
    
    ### Monthly
    if not np.isnan(shares_monthly_2020.iloc[6, i]) or not np.isnan(shares_monthly_2020.iloc[6, i]):
        if not np.isnan(shares_monthly_2020.iloc[6, i]) and not np.isnan(shares_monthly_2020.iloc[0, i]):
            shares_monthly_2020.iloc[:, i] = shares_monthly_2020.iloc[:, i].interpolate()
        elif not np.isnan(shares_monthly_2020.iloc[6, i]):
            shares_monthly_2020.iloc[1:6, i] = shares_monthly_2020.iloc[6, i]
        elif not np.isnan(shares_monthly_2020.iloc[0, i]):
            shares_monthly_2020.iloc[1:6, i] = shares_monthly_2020.iloc[0, i]

    ### Daily
    if not np.isnan(shares_daily_2020.iloc[110, i]) or not np.isnan(shares_daily_2020.iloc[110, i]):
        if not np.isnan(shares_daily_2020.iloc[110, i]) and not np.isnan(shares_daily_2020.iloc[0, i]):
            shares_daily_2020.iloc[:, i] = shares_daily_2020.iloc[:, i].interpolate()
        elif not np.isnan(shares_daily_2020.iloc[110, i]):
            shares_daily_2020.iloc[1:110, i] = shares_daily_2020.iloc[110, i]
        elif not np.isnan(shares_daily_2020.iloc[0, i]):
            shares_daily_2020.iloc[1:110, i] = shares_daily_2020.iloc[0, i]

### Filling the data
monthly_shares.loc['2020-06-01':'2020-12-31'] = shares_monthly_2020
daily_shares.loc['2020-06-30':'2020-12-01'] = shares_daily_2020

In [None]:
### Getting the market cap
monthly_mcap = monthly_prices * monthly_shares
daily_mcap = daily_prices * daily_shares

#### Using the downloaded prices to compute all the numbers we can

In [None]:
# @njit
# def fill_missing(prices, returns, mcap, shares):
#     n_rows, n_cols = prices.shape
#     for col in range(0, n_cols):
#         for row in range(1, n_rows):
#             # Fill missing price using previous price and current return
#             if np.isnan(prices[row, col]):
#                 if not np.isnan(prices[row - 1, col]) and not np.isnan(returns[row, col]):
#                     prices[row, col] = prices[row - 1, col] * (1 + returns[row, col])
#             # Fill missing return using current and previous price
#             #if np.isnan(returns[row, col]):
#             #    if not np.isnan(prices[row - 1, col]) and not np.isnan(prices[row, col]):
#             #        returns[row, col] = (prices[row, col] / prices[row - 1, col]) - 1
#             # Fill missing market cap using price and shares
#             if np.isnan(mcap[row, col]):
#                 if not np.isnan(prices[row, col]) and not np.isnan(shares[row, col]):
#                     mcap[row, col] = prices[row, col] * shares[row, col]
#     return prices, returns, mcap

In [None]:
# ### Filling the missing values
# m1, m2, m3 = fill_missing(monthly_prices.values, monthly_returns.values, monthly_mcap.values, monthly_shares.values)
# d1, d2, d3 = fill_missing(daily_prices.values, daily_returns.values, daily_mcap.values, daily_shares.values)

# ### Turning the arrays back into dataframes
# daily_prices = pd.DataFrame(d1, columns=daily_prices.columns, index=daily_prices.index)
# daily_returns = pd.DataFrame(d2, columns=daily_returns.columns, index=daily_returns.index)
# daily_mcap = pd.DataFrame(d3, columns=daily_mcap.columns, index=daily_mcap.index)

# ### Turning the arrays back into dataframes
# monthly_prices = pd.DataFrame(m1, columns=monthly_prices.columns, index=monthly_prices.index)
# monthly_returns = pd.DataFrame(m2, columns=monthly_returns.columns, index=monthly_returns.index)
# monthly_mcap = pd.DataFrame(m3, columns=monthly_mcap.columns, index=monthly_mcap.index)

In [None]:
### Removing all columns that are only NaN or are dtypes object
monthly_returns = monthly_returns.dropna(axis=1, how='all')
monthly_returns = monthly_returns.select_dtypes(exclude=['object'])
monthly_prices = monthly_prices[monthly_returns.columns]
monthly_mcap = monthly_mcap[monthly_returns.columns]
monthly_shares = monthly_shares[monthly_returns.columns]

daily_returns = daily_returns[monthly_returns.columns]
daily_prices = daily_prices[monthly_returns.columns]
daily_mcap = daily_mcap[monthly_returns.columns]
daily_shares = daily_shares[monthly_returns.columns]

### Data manipulation done

In [None]:
### Connecting to the database
conn = sqlite3.connect('Data/data.db')

### Saving the unfiltred data in the database
daily_returns.to_sql('daily_returns', conn, if_exists='replace')
daily_prices.to_sql('daily_prices', conn, if_exists='replace')
daily_mcap.to_sql('daily_mcap', conn, if_exists='replace')
daily_shares.to_sql('daily_shares', conn, if_exists='replace')

monthly_returns.to_sql('monthly_returns', conn, if_exists='replace')
monthly_prices.to_sql('monthly_prices', conn, if_exists='replace')
monthly_mcap.to_sql('monthly_mcap', conn, if_exists='replace')
monthly_shares.to_sql('monthly_shares', conn, if_exists='replace')

### Closing the connection
conn.close()

## Filtering

- Removing all stock returns that trade at a price under 10 NOK that day/month. (If the market cap is missing or under 5 million)

- Removing all stock returns that trade at a marketcap under 10 million NOK that day/month.

In [None]:
@njit
def filter_returns(ret, mcap, price, mcapthreshold1=1000000, pricethreshold2=10):
    rows, cols = ret.shape
    for col in range(cols):
        for row in range(rows):
            if mcap[row, col] < mcapthreshold1:
                ret[row, col] = np.nan
                mcap[row, col] = np.nan
                price[row, col] = np.nan
            if mcap[row, col] == np.nan or mcap[row, col] < 10000000:
                if price[row, col] < pricethreshold2:
                    ret[row, col] = np.nan
                    mcap[row, col] = np.nan
                    price[row, col] = np.nan
    return ret, mcap, price

In [None]:
m1, m2, m3 = filter_returns(monthly_returns.values, monthly_mcap.values, monthly_prices.values)
d1, d2, d3 = filter_returns(daily_returns.values, daily_mcap.values, daily_prices.values)

### Turning the arrays back into dataframes
filtered_daily_returns = pd.DataFrame(d1, columns=daily_returns.columns, index=daily_returns.index)
filtered_daily_prices = pd.DataFrame(d3, columns=daily_prices.columns, index=daily_prices.index)
filtered_daily_mcap = pd.DataFrame(d2, columns=daily_mcap.columns, index=daily_mcap.index)

### Turning the arrays back into dataframes
filtered_monthly_returns = pd.DataFrame(m1, columns=monthly_returns.columns, index=monthly_returns.index)
filtered_monthly_prices = pd.DataFrame(m3, columns=monthly_prices.columns, index=monthly_prices.index)
filtered_monthly_mcap = pd.DataFrame(m2, columns=monthly_mcap.columns, index=monthly_mcap.index)

### If we dont have 4 months of data, we remove the company
filtered_monthly_returns = filtered_monthly_returns.dropna(axis=1, thresh=4)

In [None]:
print(f"We have {monthly_returns.shape[1]} tickers in the unfiltered monthly dataset")
print(f"We have {filtered_daily_returns.shape[1]} tickers in the unfiltered daily dataset")

### Dropping the columns that are only NaN
filtered_monthly_returns = filtered_monthly_returns.dropna(axis=1, how='all')
filtered_daily_returns = filtered_daily_returns.dropna(axis=1, how='all')

print(f"We have {filtered_monthly_returns.shape[1]} tickers in the filtered monthly dataset")
print(f"We have {filtered_daily_returns.shape[1]} tickers in the filtered daily dataset")
print(f"We will only use the tickers that are in both filtered datasets, so we have {len(filtered_monthly_returns.columns)} tickers")

In [None]:
### We will only use the stocks we have monthly returns for
filtered_daily_prices = filtered_daily_prices[filtered_monthly_returns.columns]
filtered_daily_mcap = filtered_daily_mcap[filtered_monthly_returns.columns]
filtered_daily_shares = daily_shares[filtered_monthly_returns.columns]
filtered_daily_returns = filtered_daily_returns[filtered_monthly_returns.columns]
filtered_monthly_prices = filtered_monthly_prices[filtered_monthly_returns.columns]
filtered_monthly_mcap = filtered_monthly_mcap[filtered_monthly_returns.columns]
filtered_monthly_shares = monthly_shares[filtered_monthly_returns.columns]

In [None]:
### Connecting to the database
conn = sqlite3.connect('Data/data.db')

### Saving the filtred data in the database
filtered_daily_returns.to_sql('filtered_daily_returns', conn, if_exists='replace')
filtered_daily_prices.to_sql('filtered_daily_prices', conn, if_exists='replace')
filtered_daily_mcap.to_sql('filtered_daily_mcap', conn, if_exists='replace')
filtered_daily_shares.to_sql('filtered_daily_shares', conn, if_exists='replace')

filtered_monthly_returns.to_sql('filtered_monthly_returns', conn, if_exists='replace')
filtered_monthly_prices.to_sql('filtered_monthly_prices', conn, if_exists='replace')
filtered_monthly_mcap.to_sql('filtered_monthly_mcap', conn, if_exists='replace')
filtered_monthly_shares.to_sql('filtered_monthly_shares', conn, if_exists='replace')

### Closing the connection
conn.close()

In [None]:
### End timer
end = time.time()

print(f"The script took {end - start} seconds to run")