In [4]:
import yfinance as yf
import pandas as pd
import os

In [5]:
class StockTicker:
    def __init__(self, stockName:str, timePeriod:str, timeInterval:str="1h") -> None:
        self.name=stockName
        periods=['1d', '5d', '1mo', '3mo', '6mo', '1y', '2y', '5y', '10y', 'ytd', 'max']
        if(timePeriod not in periods):
            raise ValueError(f"Supported periods are [{periods}]")
        self.period=timePeriod

        intervals=["1m", "2m", "5m", "15m", "30m", "60m", "90m", "1h", "1d", "5d", "1wk", "1mo", "3mo"]

        #I guess api doesn't care the given interval if the period is too large
        if(timeInterval not in intervals):
            raise ValueError(f"Supported intervals are [{intervals}]")
        self.interval=timeInterval

        self.ticker=yf.Ticker(self.name)

    def getOHLC(self):
        try:
            self.data=self.ticker.history(period=self.period).reset_index().drop(columns=["Dividends","Stock Splits"])
        except:
            if(self.period=="max"):
                return pd.DataFrame()
            self.period="max"

            return self.getOHLC()

        # Select columns with 'float64' dtype  
        float64_cols = list(self.data.select_dtypes(include='float64'))
        
        self.data[float64_cols] = self.data[float64_cols].astype('float32')
        # Select columns with 'float64' dtype  
        int64_cols = list(self.data.select_dtypes(include='int64'))
        self.data[int64_cols] = self.data[int64_cols].astype('int32')

        self.data['Volume']=self.data['Volume'].rolling(window=5, min_periods=1, center=True).mean()
        #TODO balance_sheet update: Do we really need these for the first stage
        return self.data
    

    #TODO find the required data titles and merge their get functions to a single function


In [6]:
stocks=[
"XULAS.IS",
"XHOLD.IS",
"XUSIN.IS",
"XKMYA.IS",
"XMANA.IS",
"XINSA.IS",
"XILTM.IS",
"XUTEK.IS",
"XGMYO.IS",
"XSGRT.IS",
"XUMAL.IS",
"XTCRT.IS",
"XELKT.IS"]



In [7]:
names=os.listdir("ScrapedData")

existingStocks={}
for name in names:
    existingStocks[name[:-4]]=1


for stock in stocks:
    if (stock in existingStocks):
        continue

    print(stock)
    ticker=StockTicker(stock,'5y', "1h")
    ticker.getOHLC().to_csv(os.path.join("ScrapedData", stock + ".csv"), index=False)
    ticker=None

XULAS.IS


XULAS.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XULAS.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XHOLD.IS


XHOLD.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XHOLD.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XUSIN.IS
XKMYA.IS


XKMYA.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XKMYA.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XMANA.IS


XMANA.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XMANA.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XINSA.IS


XINSA.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XINSA.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XILTM.IS


XILTM.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XILTM.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XUTEK.IS


XUTEK.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XUTEK.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XGMYO.IS


XGMYO.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XGMYO.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XSGRT.IS


XSGRT.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XSGRT.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XUMAL.IS


XUMAL.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XUMAL.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XTCRT.IS


XTCRT.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XTCRT.IS: Period 'max' is invalid, must be one of ['1d', '5d']


XELKT.IS


XELKT.IS: Period '5y' is invalid, must be one of ['1d', '5d']
XELKT.IS: Period 'max' is invalid, must be one of ['1d', '5d']


In [60]:

# possible use case
# tickers = ['AAPL', 'GOOGL', 'MSFT', 'AMZN']

# # Download data for multiple stocks
# stock_data = yf.download(tickers, start='2023-01-01', end='2023-12-31')