In [2]:
import yfinance as yf
import sqlite3
import datetime
import pandas as pd
from sqlite3 import Error

In [7]:
DB_PATH = "chris_yfinance_testing.db"

class DBCursor:
    
    """
    Cursor context manager.
    Minimizes connection and cursor instance statements.
    
    """
    
    def __init__(self):
        self.db_path = DB_PATH

    def __enter__(self):
        self.connection = sqlite3.connect(self.db_path)
        self.connection.execute("PRAGMA foreign_keys = 1")
        self.cursor = self.connection.cursor()
        return self.cursor

    def __exit__(self, exc_type, exc_value, traceback):
        self.connection.commit()
        self.connection.close()
        if exc_type is not None:
            traceback.print_exception(exc_type, exc_value, traceback)
        return


class SecuritiesDB:

    def __init__(self):
        self.db_file = DB_PATH

    def initialize_schema(self):

        security_table = """CREATE TABLE IF NOT EXISTS security (
            ticker TEXT PRIMARY KEY,
            name TEXT,
            exchange TEXT,
            currency TEXT,
            type TEXT
            ) 
            """

        exchange_table = """CREATE TABLE IF NOT EXISTS exchange (
            exchange_name TEXT PRIMARY KEY,
            exchange_timezone TEXT,
            exchange_timezone_short TEXT
            )
            """
        
        company_table = """CREATE TABLE IF NOT EXISTS company (
            company_name TEXT,
            sector TEXT,
            hq_country TEXT,
            security_ticker TEXT,
            CONSTRAINT company_key PRIMARY KEY (security_ticker, company_name),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """

        price_per_day = """CREATE TABLE IF NOT EXISTS price_daily (
            date TEXT,
            open REAL,
            high REAL,
            low REAL,
            close REAL,
            adjusted_close REAL,
            volume INTEGER,
            security_ticker TEXT,
            CONSTRAINT price_day_key PRIMARY KEY (security_ticker, date),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """
        
        price_per_minute = """CREATE TABLE IF NOT EXISTS price_minutely (
            date TEXT,
            open REAL,
            high REAL,
            low REAL,
            close REAL,
            adjusted_close REAL,
            volume INTEGER,
            security_ticker TEXT,
            CONSTRAINT price_min_key PRIMARY KEY (security_ticker, date),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """
        
        actions_table = """CREATE TABLE IF NOT EXISTS actions (
            date TEXT,
            dividends REAL,
            stock_splits REAL,
            security_ticker TEXT,
            CONSTRAINT actions_key PRIMARY KEY (security_ticker, date),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """

        # make empty tables
        tables = [security_table, exchange_table, company_table, 
                  price_per_day, price_per_minute, actions_table]
        
        for table in tables:
            self.__create_table(table)
                   
    def start_end_max_week_intervals(self, optional_start = (datetime.datetime.today() - datetime.timedelta(29))):
        """Used to download any history (should be less than 30 days unsure...)
           in most efficient manner with interval less than 7d
        """
        intervals = []
        today = datetime.datetime.today()
        start = optional_start
        end = (start + datetime.timedelta(6))

        intervals.append([start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')])

        while end + datetime.timedelta(6) < today:
            start = (end + datetime.timedelta(1))
            end = (start + datetime.timedelta(6))

            intervals.append([start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')])

        if end + datetime.timedelta(1) < today:
            start = end + datetime.timedelta(1)
            end = today
            intervals.append([start.strftime('%Y-%m-%d'),end.strftime('%Y-%m-%d')])

        return intervals
     
    def add_tickers(self, symbols):
        
        for symbol in symbols:
            yf_ticker = yf.Ticker(symbol)
            ticker_info = yf_ticker.info
            with DBCursor() as cursor:

                # populate security table
                security_attributes = (symbol,
                                        self.__pad_dict(ticker_info, 'longName'),
                                        self.__pad_dict(ticker_info, 'exchange'),
                                        self.__pad_dict(ticker_info, 'currency'),
                                        self.__pad_dict(ticker_info, 'quoteType')) 
                
                cursor.execute("INSERT OR IGNORE INTO security VALUES (?,?,?,?,?)", security_attributes)

                # populate exchange table
                exchange_attributes = (self.__pad_dict(ticker_info, 'exchange'),
                                        self.__pad_dict(ticker_info, 'exchangeTimezoneName'),
                                        self.__pad_dict(ticker_info, 'exchangeTimezoneShortName'))

                cursor.execute("INSERT OR IGNORE INTO exchange VALUES (?,?,?)", exchange_attributes)

                # populate company table
                company_attributes = (self.__pad_dict(ticker_info, 'longName'),
                                      self.__pad_dict(ticker_info, 'sector'),
                                      self.__pad_dict(ticker_info, 'country'),
                                      symbol)
                
                cursor.execute("INSERT OR IGNORE INTO company VALUES (?,?,?,?)", company_attributes)

                # populate price_daily table

                time_series_daily = yf.download(symbol, period='max', interval='1d', threads='true', progress=False)
                time_series_daily['security_ticker'] = [symbol] * len(time_series_daily.index)
                time_series_daily.index = time_series_daily.index.strftime("%Y-%m-%d %H:%M:%S")

                time_series_formatted = time_series_daily.itertuples()
                data = tuple(time_series_formatted)

                wildcards = ','.join(['?'] * 8)

                cursor.executemany("INSERT OR IGNORE INTO price_daily VALUES (%s)" % wildcards, data)

                # populate price_minutely table
                
                date_intervals = self.start_end_max_week_intervals()
                
                for date in date_intervals:

                    time_series_minutely = yf.download(symbol, start=date[0], end=date[1], interval='1m', threads='true', progress=False)
                    time_series_minutely['security_ticker'] = [symbol] * len(time_series_minutely.index)
                    time_series_minutely.index = time_series_minutely.index.strftime("%Y-%m-%d %H:%M:%S")

                    time_series_formatted = time_series_minutely.itertuples()
                    data = tuple(time_series_formatted)

                    wildcards = ','.join(['?'] * 8)

                    cursor.executemany("INSERT OR IGNORE INTO price_minutely VALUES (%s)" % wildcards, data)
                    
                # populate actions table

                actions = yf_ticker.actions
                actions['security_ticker'] = [symbol] * len(actions.index)
                actions.index = actions.index.strftime("%Y-%m-%d %H:%M:%S")

                actions_formatted = actions.itertuples()
                data = tuple(actions_formatted)

                wildcards = ','.join(['?'] * 4)

                cursor.executemany("INSERT OR IGNORE INTO actions VALUES (%s)" % wildcards, data)
            print(symbol, " data downloaded and populated in tables. ")
                
    def fetch_minutely_starting_at(self, ticker, start):
        #must be run within 29 days to maintain continuity with existing dataset
        
        assert start > datetime.datetime.today() - datetime.timedelta(29)
        date_intervals = self.start_end_max_week_intervals(start)
                
        for date in date_intervals:

            time_series_minutely = yf.download(ticker, start=date[0], end=date[1], interval='1m', threads='true', progress=False)
            time_series_minutely['security_ticker'] = [ticker] * len(time_series_minutely.index)
            time_series_minutely.index = time_series_minutely.index.strftime("%Y-%m-%d %H:%M:%S")

            time_series_formatted = time_series_minutely.itertuples()
            data = tuple(time_series_formatted)

            return data
         
    def fetch_daily_between(self, ticker, start, end):
        
        time_series_daily = yf.download(ticker, start=start.strftime('%Y-%m-%d'),end=end.strftime('%Y-%m-%d'), interval='1d', threads='true', progress=False)
        time_series_daily['security_ticker'] = [ticker] * len(time_series_daily.index)
        time_series_daily.index = time_series_daily.index.strftime("%Y-%m-%d %H:%M:%S")

        time_series_formatted = time_series_daily.itertuples()
        data = tuple(time_series_formatted)

        return data

    def get_table(self, tablename):
        
        with DBCursor() as cursor:
            cursor.execute("SELECT rowid,* FROM %s"%tablename)
            fulltable = cursor.fetchall()

        return fulltable
    
    def drop_all_tables(self):
        
        with DBCursor() as cursor:
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
            tables = cursor.fetchall()
            for table in tables:
                cursor.execute("DROP TABLE %s"%table[0])

    def __create_table(self, create_table_sql):

        with DBCursor() as cursor:
            
            try:
                cursor.execute(create_table_sql)
            except Error as e:
                print(e)
                
    def __pad_dict(self, ticker_dict, key):
        
        if key not in ticker_dict.keys():
            return None
        else:
            return ticker_dict[key]
    
    def get_daily_per_ticker(self, ticker):
        
        with DBCursor() as cursor:
            query = "SELECT * FROM price_daily WHERE security_ticker=? ORDER BY date"
            cursor.execute(query, (ticker,))
            output = cursor.fetchall()
            cols = ["date","open", "high", "low", "close", "adjusted_close", "volume", "security_ticker"]
            df = pd.DataFrame(output, columns = cols)
            
            return df 
        
    def get_minutely_per_ticker(self, ticker):
        
        with DBCursor() as cursor:
            query = "SELECT * FROM price_minutely WHERE security_ticker=? ORDER BY date"
            cursor.execute(query, (ticker,))
            output = cursor.fetchall()
            cols = ["date","open", "high", "low", "close", "adjusted_close", "volume", "security_ticker"]
            df = pd.DataFrame(output, columns = cols)
            
            return df 
        
    def get_actions_per_ticker(self, ticker):
        
        with DBCursor() as cursor:
            query = "SELECT * FROM actions WHERE security_ticker=? ORDER BY date"
            cursor.execute(query, (ticker,))
            output = cursor.fetchall()
            cols = ["date","dividends","stock_splits", "security_ticker"]
            df = pd.DataFrame(output, columns = cols)
            
            return df 
        
    def get_present_tickers(self):
        
        with DBCursor() as cursor:
            query = "SELECT ticker FROM security"
            cursor.row_factory = lambda cursor, row: row[0]
            cursor.execute(query)
            output = cursor.fetchall()
        return output
    
    def actions_since_date(self, ticker, date):
    
        yf_ticker = yf.Ticker(ticker)
        actions = yf_ticker.actions
        actions['security_ticker'] = [ticker] * len(actions.index)
        actions.index = actions.index.strftime("%Y-%m-%d %H:%M:%S")
        actions_formatted = actions.itertuples()
        data = tuple(actions_formatted)
        
        cols = ["date","dividends","stock_splits", "security_ticker"]
        actions_df = pd.DataFrame(data, columns = cols)
        
        actions_since = actions_df[actions_df["date"]>date.strftime("%Y-%m-%d")]
        
        return actions_since
             
    def update(self):
        ''' Perform update of actions/daily-price/minutely-price and account for stock splits + dividends 
            Work in progress
        '''
        
        tickers_present = self.get_present_tickers()
        today = datetime.datetime.today()
        for ticker in tickers_present:
            
            latest_daily = datetime.datetime.strptime(self.get_daily_per_ticker(ticker)["date"].iloc[-1], "%Y-%m-%d %H:%M:%S")
            latest_minutely = datetime.datetime.strptime(self.get_minutely_per_ticker(ticker)["date"].iloc[-1], "%Y-%m-%d %H:%M:%S")
            
            actions_since = self.actions_since_date(ticker, latest_daily)
            
            if actions_since.empty:
                
                
                if today >  latest_daily + datetime.timedelta(1):
                    
                    # do daily updates
                    daily_data = self.fetch_daily_between(ticker, latest_daily, today)

                    with DBCursor() as cursor:
                        wildcards = ','.join(['?'] * 8)
                        cursor.executemany("INSERT OR IGNORE INTO price_daily VALUES (%s)" % wildcards, daily_data)
                        
                    # do minutely updates
                    if today - datetime.timedelta(1) < latest_daily + datetime.timedelta(29):
                    
                        minutely_data = self.fetch_minutely_starting_at(ticker, today-datetime.timedelta(1))

                        with DBCursor() as cursor:
                            wildcards = ','.join(['?'] * 8)
                            cursor.executemany("INSERT OR IGNORE INTO price_minutely VALUES (%s)" % wildcards, minutely_data)
                            
                    else:
                        print("Updating this late (>29 days since last update) will break timeseries continuity.")
                        
                else:
                    print(ticker, " data already up to date.")
                    
            # TODO: else (create multiplcation sequence to do serial dividend/split updates)
                
                

In [35]:
stocksDB = SecuritiesDB()
stocksDB.initialize_schema()

In [36]:
tickers=['MSFT', 'AAPL', 'HUT', 'HUT.TO', 'SPY', 'CADUSD=X', 'BTC-USD', 'ETH-USD', 'ETHX-U.TO']

stocksDB.add_tickers(tickers)
print("finished")

MSFT  data downloaded and populated in tables. 
AAPL  data downloaded and populated in tables. 
HUT  data downloaded and populated in tables. 
HUT.TO  data downloaded and populated in tables. 
SPY  data downloaded and populated in tables. 
CADUSD=X  data downloaded and populated in tables. 
BTC-USD  data downloaded and populated in tables. 
ETH-USD  data downloaded and populated in tables. 
ETHX-U.TO  data downloaded and populated in tables. 
finished


In [37]:
#print(stocksDB.get_table("security"))
#print(stocksDB.get_table("exchange"))
#print(stocksDB.get_table("company"))
#print(stocksDB.get_actions_per_ticker("AAPL"))
print(stocksDB.get_table("price_minutely")[0:1000])
#print(stocksDB.get_daily_per_ticker('PPL')[0:10])


[(1, '2021-11-09 09:30:00', 337.1070861816406, 337.75, 336.7699890136719, 337.1199951171875, 337.1199951171875, 798944, 'MSFT'), (2, '2021-11-09 09:31:00', 337.2099914550781, 337.7900085449219, 337.2099914550781, 337.364990234375, 337.364990234375, 85660, 'MSFT'), (3, '2021-11-09 09:32:00', 337.385009765625, 337.75, 337.29998779296875, 337.6000061035156, 337.6000061035156, 78572, 'MSFT'), (4, '2021-11-09 09:33:00', 337.5899963378906, 337.7333984375, 337.1199951171875, 337.5799865722656, 337.5799865722656, 85942, 'MSFT'), (5, '2021-11-09 09:34:00', 337.5, 337.5199890136719, 337.3599853515625, 337.4200134277344, 337.4200134277344, 102642, 'MSFT'), (6, '2021-11-09 09:35:00', 337.5, 337.6300048828125, 337.3500061035156, 337.6300048828125, 337.6300048828125, 61271, 'MSFT'), (7, '2021-11-09 09:36:00', 337.6099853515625, 337.6099853515625, 337.5199890136719, 337.55999755859375, 337.55999755859375, 70516, 'MSFT'), (8, '2021-11-09 09:37:00', 337.5, 337.7099914550781, 337.4599914550781, 337.6799

In [39]:
print(stocksDB.get_table("security"))
print(stocksDB.get_table("exchange"))
print(stocksDB.get_table("company"))
#print(stocksDB.get_table("price_daily"))
#print(stocksDB.get_table("price_minutely"))

[(1, 'MSFT', 'Microsoft Corporation', 'NMS', 'USD', 'EQUITY'), (2, 'AAPL', 'Apple Inc.', 'NMS', 'USD', 'EQUITY'), (3, 'HUT', 'Hut 8 Mining Corp.', 'NMS', 'USD', 'EQUITY'), (4, 'HUT.TO', 'Hut 8 Mining Corp.', 'TOR', 'CAD', 'EQUITY'), (5, 'SPY', 'SPDR S&P 500 ETF Trust', 'PCX', 'USD', 'ETF'), (6, 'CADUSD=X', None, 'CCY', 'USD', 'CURRENCY'), (7, 'BTC-USD', None, 'CCC', 'USD', 'CRYPTOCURRENCY'), (8, 'ETH-USD', None, 'CCC', 'USD', 'CRYPTOCURRENCY'), (9, 'ETHX-U.TO', 'CI Galaxy Ethereum ETF', 'TOR', 'USD', 'ETF')]
[(1, 'NMS', 'America/New_York', 'EST'), (2, 'TOR', 'America/Toronto', 'EST'), (3, 'PCX', 'America/New_York', 'EST'), (4, 'CCY', 'Europe/London', 'GMT'), (5, 'CCC', 'UTC', 'UTC')]
[(1, 'Microsoft Corporation', 'Technology', 'United States', 'MSFT'), (2, 'Apple Inc.', 'Technology', 'United States', 'AAPL'), (3, 'Hut 8 Mining Corp.', 'Financial Services', 'Canada', 'HUT'), (4, 'Hut 8 Mining Corp.', 'Financial Services', 'Canada', 'HUT.TO'), (5, 'SPDR S&P 500 ETF Trust', None, None, 'S

In [9]:
stocksDB = SecuritiesDB()

In [10]:
print(stocksDB.get_present_tickers())
stocksDB.update()


['AAPL', 'BTC-USD', 'CADUSD=X', 'ETH-USD', 'ETHX-U.TO', 'HUT', 'HUT.TO', 'MSFT', 'SPY']
AAPL  DB already up to date.
BTC-USD  DB already up to date.
CADUSD=X  DB already up to date.
ETH-USD  DB already up to date.
ETHX-U.TO  DB already up to date.
HUT  DB already up to date.
HUT.TO  DB already up to date.
MSFT  DB already up to date.
SPY  DB already up to date.


In [43]:
print(stocksDB.get_minutely_per_ticker("MSFT").tail())
print(stocksDB.get_daily_per_ticker("MSFT").tail())

                     date        open        high         low       close  \
6055  2021-12-07 15:55:00  333.799988  334.250000  333.524994  334.059998   
6056  2021-12-07 15:56:00  334.079987  334.350006  333.980011  334.010010   
6057  2021-12-07 15:57:00  334.015015  334.160004  333.809998  333.970001   
6058  2021-12-07 15:58:00  333.970001  334.320007  333.790009  334.279999   
6059  2021-12-07 15:59:00  334.290009  335.299988  334.290009  334.929993   

      adjusted_close   volume security_ticker  
6055      334.059998   251098            MSFT  
6056      334.010010   198713            MSFT  
6057      333.970001   222215            MSFT  
6058      334.279999   288498            MSFT  
6059      334.929993  1018407            MSFT  
                     date        open        high         low       close  \
9005  2021-12-02 00:00:00  330.299988  333.489990  327.799988  329.489990   
9006  2021-12-03 00:00:00  331.989990  332.700012  318.029999  323.010010   
9007  2021-12-06 0