In [1]:
### Installing the required packages if not already installed
packages = ['numpy', 'pandas', 'warnings', 'sqlite3', 'yfinance', 'numba', 'time']

for package in packages:
    try:
        __import__(package)
    except ImportError:
        %pip install {package}


### Start timer
import time
start = time.time()

import numpy as np
import pandas as pd
import warnings
import sqlite3
import os

### Ignoring the warnings
warnings.filterwarnings('ignore')

### Setting working directory
os.chdir('/Users/emilwilliamhansen/Desktop/Master-Thesis/Code')

In [2]:
### Reading the index daily data
index_83_99 = pd.read_csv("Data/obi/daily_equity_index_ose/stock_index_1983_1999.csv",
                     sep=';', encoding='latin1')

index_00_09 = pd.read_csv("Data/obi/daily_equity_index_ose/stock_index_2000_2009.csv",
                        sep=';', encoding='latin1')

index_10_20 = pd.read_csv("Data/obi/daily_equity_index_ose/stock_index_2010_2020.csv",
                        sep=';', encoding='latin1')

index_20_24 = pd.read_csv("Data/obi/daily_equity_index_ose/norway_index_observations_2020_2024.csv",
                        sep=';', encoding='latin1').rename(columns={'Date': 'date', 'symbol': 'ticker', 'ISIN': 'secid'})

### Concatenating the data
index_daily = pd.concat([index_83_99, index_00_09, index_10_20, index_20_24])

### Fixing the date format
index_daily['date'] = pd.to_datetime(index_daily['date'], format='%Y%m%d')  
index_daily

Unnamed: 0,date,secid,ticker,name,close
0,1983-01-03,9002,FRSX,Forsikringsindeks,100.00
1,1983-01-03,9004,INDX,Industriindeks,100.00
2,1983-01-03,9008,SKIX,Skipsindeks,100.00
3,1983-01-03,9009,TOTX,Totalindeks,100.00
4,1983-01-04,9000,BANX,Bankindeks,99.73
...,...,...,...,...,...
20656,2024-08-14,NO0007035327,OSEBX,OSEBX GR,1437.29
20657,2024-08-14,NO0010890429,OFINN,OBX FINANCIALS NR,157.04
20658,2024-08-14,NO0010890510,OCSN,OBX CONS STAPLE NR,115.41
20659,2024-08-14,NO0010890403,OHCP,OBX HEALTH C PR,42.78


In [3]:
### Looking at the values in daily where the ticker is NaN
print(index_daily[index_daily['ticker'].isnull()]["name"].value_counts())

Series([], Name: count, dtype: int64)


In [4]:
### Getting the tickers and names
index_tickers = index_daily[['ticker', 'name']].drop_duplicates(subset='ticker').sort_values('ticker').reset_index(drop=True)

### From long format to wide format
dates = index_daily['date'].drop_duplicates().sort_values().reset_index(drop=True)
tickers = index_tickers['ticker']

index_daily_prices = index_daily.pivot_table(
    index='date',
    columns='ticker',
    values='close',
    aggfunc='first'
).reindex(index=dates, columns=tickers)

### Resampling the data to monthly
index_monthly_prices = index_daily_prices.resample('M').last()

### Getting returns
index_daily_returns = index_daily_prices.pct_change().iloc[1:]
index_monthly_returns = index_monthly_prices.pct_change().iloc[1:]

### Connecting to the database
conn = sqlite3.connect('Data/data.db')

index_daily_prices.to_sql('index_daily_prices', conn, if_exists='replace')
index_monthly_prices.to_sql('index_monthly_prices', conn, if_exists='replace')
index_daily_returns.to_sql('index_daily_returns', conn, if_exists='replace')
index_monthly_returns.to_sql('index_monthly_returns', conn, if_exists='replace')
index_tickers.to_sql('index_tickers', conn, if_exists='replace')

### Closing the connection
conn.close()