In [1]:
import yfinance as yf
import sqlite3
import datetime
import pandas as pd
from sqlite3 import Error

In [12]:
DB_PATH = "chris_yfinance_testing.db"

class DBCursor:
    
    """
    Cursor context manager.
    Minimizes connection and cursor instance statements.
    
    """
    
    def __init__(self):
        self.db_path = DB_PATH

    def __enter__(self):
        self.connection = sqlite3.connect(self.db_path)
        self.connection.execute("PRAGMA foreign_keys = 1")
        self.cursor = self.connection.cursor()
        return self.cursor

    def __exit__(self, exc_type, exc_value, traceback):
        self.connection.commit()
        self.connection.close()
        if exc_type is not None:
            traceback.print_exception(exc_type, exc_value, traceback)
        return


class SecuritiesDB:

    def __init__(self):
        self.db_file = DB_PATH

    def initialize_schema(self):

        security_table = """CREATE TABLE IF NOT EXISTS security (
            ticker TEXT PRIMARY KEY,
            name TEXT UNIQUE,
            exchange TEXT,
            currency TEXT,
            type TEXT
            ) 
            """

        exchange_table = """CREATE TABLE IF NOT EXISTS exchange (
            exchange_name TEXT PRIMARY KEY,
            exchange_timezone TEXT,
            exchange_timezone_short TEXT
            )
            """
        
        company_table = """CREATE TABLE IF NOT EXISTS company (
            company_name TEXT PRIMARY KEY,
            sector TEXT,
            hq_country TEXT,
            FOREIGN KEY(company_name) REFERENCES security (name)
            )
            """

        price_per_day = """CREATE TABLE IF NOT EXISTS price_daily (
            date TEXT,
            open REAL,
            high REAL,
            low REAL,
            close REAL,
            adjusted_close REAL,
            volume INTEGER,
            security_ticker TEXT,
            CONSTRAINT price_day_key PRIMARY KEY (security_ticker, date),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """
        
        price_per_minute = """CREATE TABLE IF NOT EXISTS price_minutely (
            date TEXT,
            open REAL,
            high REAL,
            low REAL,
            close REAL,
            adjusted_close REAL,
            volume INTEGER,
            security_ticker TEXT,
            CONSTRAINT price_min_key PRIMARY KEY (security_ticker, date),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """
        
        actions_table = """CREATE TABLE IF NOT EXISTS actions (
            date TEXT,
            dividends REAL,
            stock_splits REAL,
            security_ticker TEXT,
            CONSTRAINT actions_key PRIMARY KEY (security_ticker, date),
            FOREIGN KEY(security_ticker) REFERENCES security (ticker)
            )
            """

        # make empty tables
        tables = [security_table, exchange_table, company_table, 
                  price_per_day, price_per_minute, actions_table]
        
        for table in tables:
            self.__create_table(table)
            
            
    def start_end_max_week_intervals(self):
        """Used to download any history (should be less than 30 days unsure...)
           in most efficient manner with interval less than 7d
        """
        intervals = []
        today = datetime.datetime.today()
        start = (today - datetime.timedelta(29))
        end = (start + datetime.timedelta(6))

        intervals.append([start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')])

        while end + datetime.timedelta(6) < today:
            start = (end + datetime.timedelta(1))
            end = (start + datetime.timedelta(6))

            intervals.append([start.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')])

        if end + datetime.timedelta(1) < today:
            start = end + datetime.timedelta(1)
            end = today
            intervals.append([start.strftime('%Y-%m-%d'),end.strftime('%Y-%m-%d')])

        return intervals
     
    def add_tickers(self, symbols):
        
        for symbol in symbols:
            yf_ticker = yf.Ticker(symbol)
            ticker_info = yf_ticker.info
            with DBCursor() as cursor:

                # populate security table
                security_attributes = (symbol,
                                        self.__pad_dict(ticker_info, 'longName'),
                                        self.__pad_dict(ticker_info, 'exchange'),
                                        self.__pad_dict(ticker_info, 'currency'),
                                        self.__pad_dict(ticker_info, 'quoteType')) 
                
                cursor.execute("INSERT OR IGNORE INTO security VALUES (?,?,?,?,?)", security_attributes)

                # populate exchange table
                exchange_attributes = (self.__pad_dict(ticker_info, 'exchange'),
                                        self.__pad_dict(ticker_info, 'exchangeTimezoneName'),
                                        self.__pad_dict(ticker_info, 'exchangeTimezoneShortName'))

                cursor.execute("INSERT OR IGNORE INTO exchange VALUES (?,?,?)", exchange_attributes)

                # populate company table
                company_attributes = (self.__pad_dict(ticker_info, 'longName'),
                                      self.__pad_dict(ticker_info, 'sector'),
                                      self.__pad_dict(ticker_info, 'country'))
                
                cursor.execute("INSERT OR IGNORE INTO company VALUES (?,?,?)", company_attributes)

                # populate price_daily table

                time_series_daily = yf.download(symbol, period='max', interval='1d', threads='true')
                time_series_daily['security_ticker'] = [symbol] * len(time_series_daily.index)
                time_series_daily.index = time_series_daily.index.strftime("%Y-%m-%d %H:%M:%S")

                time_series_formatted = time_series_daily.itertuples()
                data = tuple(time_series_formatted)

                wildcards = ','.join(['?'] * 8)

                cursor.executemany("INSERT OR IGNORE INTO price_daily VALUES (%s)" % wildcards, data)

                # populate price_minutely table
                
                date_intervals = self.start_end_max_week_intervals()
                
                for date in date_intervals:

                    time_series_minutely = yf.download(symbol, start=date[0], end=date[1], interval='1m', threads='true')
                    time_series_minutely['security_ticker'] = [symbol] * len(time_series_minutely.index)
                    time_series_minutely.index = time_series_minutely.index.strftime("%Y-%m-%d %H:%M:%S")

                    time_series_formatted = time_series_minutely.itertuples()
                    data = tuple(time_series_formatted)

                    wildcards = ','.join(['?'] * 8)

                    cursor.executemany("INSERT OR IGNORE INTO price_minutely VALUES (%s)" % wildcards, data)
                    
                # populate actions table

                actions = yf_ticker.actions
                actions['security_ticker'] = [symbol] * len(actions.index)
                actions.index = actions.index.strftime("%Y-%m-%d %H:%M:%S")

                actions_formatted = actions.itertuples()
                data = tuple(actions_formatted)

                wildcards = ','.join(['?'] * 4)

                cursor.executemany("INSERT OR IGNORE INTO actions VALUES (%s)" % wildcards, data)
            
    def get_table(self, tablename):
        
        with DBCursor() as cursor:
            cursor.execute("SELECT rowid,* FROM %s"%tablename)
            fulltable = cursor.fetchall()

        return fulltable
    
    def drop_all_tables(self):
        
        with DBCursor() as cursor:
            cursor.execute("SELECT name FROM sqlite_master WHERE type='table'")
            tables = cursor.fetchall()
            for table in tables:
                cursor.execute("DROP TABLE %s"%table[0])

    def __create_table(self, create_table_sql):

        with DBCursor() as cursor:
            
            try:
                cursor.execute(create_table_sql)
            except Error as e:
                print(e)
                
    def __pad_dict(self, ticker_dict, key):
        
        if key not in ticker_dict.keys():
            return None
        else:
            return ticker_dict[key]
    
    def get_daily_per_ticker(self, ticker):
        
        with DBCursor() as cursor:
            query = "SELECT * FROM price_daily WHERE security_ticker=? ORDER BY date"
            cursor.execute(query, (ticker,))
            output = cursor.fetchall()
            cols = ["date","open", "high", "low", "close", "adjusted_close", "volume", "security_ticker"]
            df = pd.DataFrame(output, columns = cols)
            
            return df 
        
    def get_minutely_per_ticker(self, ticker):
        
        with DBCursor() as cursor:
            query = "SELECT * FROM price_minutely WHERE security_ticker=? ORDER BY date"
            cursor.execute(query, (ticker,))
            output = cursor.fetchall()
            cols = ["date","open", "high", "low", "close", "adjusted_close", "volume", "security_ticker"]
            df = pd.DataFrame(output, columns = cols)
            
            return df 
        
    def get_actions_per_ticker(self, ticker):
        
        with DBCursor() as cursor:
            query = "SELECT * FROM actions WHERE security_ticker=? ORDER BY date"
            cursor.execute(query, (ticker,))
            output = cursor.fetchall()
            cols = ["date","dividends","stock_splits", "security_ticker"]
            df = pd.DataFrame(output, columns = cols)
            
            return df 
        

In [13]:
stocksDB = SecuritiesDB()
stocksDB.initialize_schema()

In [14]:
#tickers=['MSFT', 'AAPL', 'HUT', 'HUT.TO', 'SPY', 'CADUSD=X', 'BTC-USD', 'ETH-USD', 'ETHX-U.TO']
tickers=['ETHX-U.TO', 'PPL']

stocksDB.add_tickers(tickers)
print("finished")

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
finished


In [17]:
#print(stocksDB.get_table("security"))
#print(stocksDB.get_table("exchange"))
#print(stocksDB.get_table("company"))
print(stocksDB.get_actions_per_ticker("PPL"))
#print(stocksDB.get_table("price_minutely")[0:1000])
#print(stocksDB.get_daily_per_ticker('PPL')[0:10])


                    date  dividends  stock_splits security_ticker
0    1985-06-04 00:00:00   0.149024           0.0             PPL
1    1985-09-04 00:00:00   0.149024           0.0             PPL
2    1985-12-04 00:00:00   0.149024           0.0             PPL
3    1986-03-04 00:00:00   0.149024           0.0             PPL
4    1986-06-04 00:00:00   0.149024           0.0             PPL
..                   ...        ...           ...             ...
139  2020-09-09 00:00:00   0.415000           0.0             PPL
140  2020-12-09 00:00:00   0.415000           0.0             PPL
141  2021-03-09 00:00:00   0.415000           0.0             PPL
142  2021-06-09 00:00:00   0.415000           0.0             PPL
143  2021-09-09 00:00:00   0.415000           0.0             PPL

[144 rows x 4 columns]


In [16]:
stocksDB.drop_all_tables()

In [24]:
stocksDB = SecuritiesDB()
stocksDB.initialize_schema()

ticker="MSFT"
stocksDB.add_ticker(ticker)

ticker="AAPL"
stocksDB.add_ticker(ticker)

ticker="BTC-USD"
stocksDB.add_ticker(ticker)



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [26]:
print(stocksDB.get_table("security"))
print(stocksDB.get_table("exchange"))
print(stocksDB.get_table("company"))
print(stocksDB.get_table("price_daily"))
#print(stocksDB.get_table("price_minutely"))

[(1, 'MSFT', 'Microsoft Corporation', 'NMS', 'USD', 'EQUITY'), (2, 'AAPL', 'Apple Inc.', 'NMS', 'USD', 'EQUITY'), (3, 'BTC-USD', None, 'CCC', 'USD', 'CRYPTOCURRENCY')]
[(1, 'NMS', 'America/New_York', 'EST'), (2, 'CCC', 'UTC', 'UTC')]
[(1, 'Microsoft Corporation', 'Technology', 'United States', 'MSFT'), (2, 'Apple Inc.', 'Technology', 'United States', 'AAPL'), (3, None, None, None, 'BTC-USD')]
[(1, '2021-11-24 00:00:00', 336.2799987792969, 338.1600036621094, 333.9100036621094, 337.9100036621094, 337.9100036621094, 21620600, 'MSFT'), (2, '2021-11-24 00:00:00', 160.75, 162.13999938964844, 159.63999938964844, 161.94000244140625, 161.94000244140625, 69396000, 'AAPL'), (3, '2021-11-25 00:00:00', 57409.09765625, 58807.63671875, 57106.0078125, 58652.73046875, 58652.73046875, 34721878016, 'BTC-USD')]
