In [1]:
import os
from dotenv import load_dotenv
import sqlalchemy
import pymysql
import requests
import io
import pandas as pd
import yfinance as yf
pymysql.install_as_MySQLdb()

In [2]:
class ArkxEtfHoldings:
    def __init__(self, name):
        self.name = name
    
# Setp 1: Get the ARKx fund holdings CSV file from the ARK web site

    def Get_ARK_CSV(self):
        header = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/601.3.9 (KHTML, like Gecko) Version/9.0.2 Safari/601.3.9'
        }
        if self.name == 'ARKK':
            url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv'
        elif self.name == 'ARKF':
            url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_FINTECH_INNOVATION_ETF_ARKF_HOLDINGS.csv'
        elif self.name == 'ARKW':
            url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_NEXT_GENERATION_INTERNET_ETF_ARKW_HOLDINGS.csv'
        elif self.name == 'ARKQ':
            url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_AUTONOMOUS_TECHNOLOGY_&_ROBOTICS_ETF_ARKQ_HOLDINGS.csv'
        elif self.name == 'ARKX':
            url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_SPACE_EXPLORATION_&_INNOVATION_ETF_ARKX_HOLDINGS.csv'
        elif self.name == 'ARKG':
            url = 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_GENOMIC_REVOLUTION_MULTISECTOR_ETF_ARKG_HOLDINGS.csv'
        response = requests.get(url, headers=header)
        file_object = io.StringIO(response.content.decode('utf-8'))
        df = pd.read_csv(file_object)
        return df

# Step2: Extract the tickers to a list
    def get_tickerlist(self):
        df = self.Get_ARK_CSV()  # call setp1
        # print(df['ticker'].to_list())
        return df['ticker'].to_list()

In [3]:
class LoadDb:
    def __init__(self):
        load_dotenv()
        self.endpoint = os.getenv("DB_ACCESS_KEY")
        self.username = os.getenv("USERNAME")
        self.password = os.getenv("USERPASS")
        
       
    def getDbConn(self,schema):
        db_connection_str = "mysql+pymysql://"+self.username+ ":" +self.password +"@"+self.endpoint+"/"+ schema
        print(db_connection_str)
        return sqlalchemy.create_engine(db_connection_str).connect()
    
    def getDb(self,schema):
        return pymysql.connect(host=self.endpoint, user=self.username,passwd=self.password, database= schema)

    def createEtfxSchema(self,schema):
        try:
            db_connection_str = "mysql+pymysql://"+self.username+ ":" +self.password +"@"+self.endpoint
            engine = sqlalchemy.create_engine(db_connection_str)
            engine.execute(sqlalchemy.schema.CreateSchema(schema))
        except Exception as e:
            print('error',e)

In [4]:
class LoadArkxEtf:
    tkers =[]
    def __init__(self,name):
        self.name = name
        loadDb = LoadDb()
        self.connYfData = loadDb.getDbConn('YfData')
        self.dbTa = loadDb.getDb('Ta')

        
    def loadYfTables(self,conn,tkers):
        goodTkers = [x for x in tkers if x == x] #drop all nan
        for symbol in goodTkers:
            space_pos = symbol.find(' ')
            if space_pos != -1: # drop char after space in some case
                symbol = symbol[:space_pos]
            print(symbol)
            
            try:
            # LoadYfData table    
                df = yf.download(symbol, start='2020-07-01')
                if len(df)>0:
                    df = df.reset_index()
                    try:
                        df.to_sql(symbol, conn, if_exists='replace')
                    except Exception as e:
                        print(e)
            
            except Exception as e:
                pass
        conn.close()

    def loadGrpStkRel(self,db,tkers):
        goodTkers = [x for x in tkers if x == x] #drop all nan
        mycursor = db.cursor()
        # Get the Gid if not exists insert it
        Gid = 0
        sql1=f"SELECT Id Gid FROM `StockGroup` WHERE Name = %s"
        # print(self.name,'ggggg',sql1)
        mycursor.execute(sql1,{self.name})
        for (Gid,) in mycursor:
            pass
            # print('xxxx',type(Gid), Gid)
        if Gid != 0:
            pass
            # print("yes Group record found Gid = ",Gid,'---',self.name)
        else:
            print("no Group record found for ",self.name)
            # Load StockGroup table and get the Gid
            sql2 = f"INSERT INTO `StockGroup` (Name) Values (%s)"
            mycursor.execute(sql2,{self.name})
            mycursor.execute('SELECT @@identity Gid')
            for (Gid,) in mycursor:
                print ('new Gid=',Gid)


        # Get the Sid (stock Id) if not exists insert it
        for tker in goodTkers:
            Sid = 0
            sql3=f"SELECT Id Sid FROM `Stock` WHERE Symbol = %s"
            # print(tker,'sssss',sql3)
            mycursor.execute(sql3,{tker})
            for (Sid,) in mycursor:
                pass
                # print('ssss',type(Sid), Sid)
            if Sid != 0:
                pass
                # print("yes Ticker record found Sid = ",Sid,'---',tker)
            else:
                pass
                # print("no Ticker record found for ",tker)
            # Load StockGroup table and get the Gid
                sql4 = f"INSERT INTO `Stock` (Name,Symbol) Values (%s,%s)"
                mycursor.execute(sql4,({self.name},{tker}))
                mycursor.execute('SELECT @@identity Sid')
                for (Sid,) in mycursor:
                    pass
                    # print ('new Sid=',Sid)
        
        # Get the GrpStkRel Id if not exists insert it
        # for tker in goodTkers:
            sql5=f"SELECT Id Rid FROM `GrpStkRel` WHERE Gid = %s and Sid = %s"
            # print(tker,'rrrr',sql5)
            mycursor.execute(sql5,({Gid},{Sid}))
            Rid = 0
            for (Rid,) in mycursor:
                pass
                # print('rrrr',type(Rid), Rid,Gid,Sid)
            if Rid != 0:
                pass
                # print("yes Relation record found for ",self.name, 'to', tker)
            else:
                pass
                # print("no Relation record found for ",self.name, 'to', tker)
            # Load GrpStlRel table and get the Gid
                sql6 = f"INSERT INTO `GrpStkRel` (Gid,Sid) Values (%s,%s)"
                mycursor.execute(sql6,({Gid},{Sid}))
                mycursor.execute('SELECT @@identity Rid')
                for (Rid,) in mycursor:
                    pass
                    # print ('new Rid=',Rid)
        db.commit()        
        db.close()
            
            
    def procEtfx(self):
        arkxHoldings = ArkxEtfHoldings(self.name)
        self.tkers = arkxHoldings.get_tickerlist()
        self.loadYfTables(self.connYfData,self.tkers) #load yahoo finace data tables
        self.loadGrpStkRel(self.dbTa,self.tkers) #load Ta tables

In [5]:
arkxEtfLst=['ARKK','ARKF','ARKW','ARKQ','ARKX','ARKG']
# arkxEtfLst=['ARKK']
for etf in arkxEtfLst:
    arkx= LoadArkxEtf(etf)
    arkx.procEtfx()

mysql+pymysql://root:12344321@localhost/YfData
no Group record found for  ARKK
new Gid= 280
mysql+pymysql://root:12344321@localhost/YfData
no Group record found for  ARKF
new Gid= 281
mysql+pymysql://root:12344321@localhost/YfData
no Group record found for  ARKW
new Gid= 282
mysql+pymysql://root:12344321@localhost/YfData
no Group record found for  ARKQ
new Gid= 283
mysql+pymysql://root:12344321@localhost/YfData
no Group record found for  ARKX
new Gid= 284
mysql+pymysql://root:12344321@localhost/YfData
no Group record found for  ARKG
new Gid= 285


### Create schema

In [6]:
# arkxEtfLst=['ARKF']
# for etf in arkxEtfLst:
#     arkx= loadArkxEtf(etf)
#     arkx.procEtfx()

### Get symbols

In [7]:
# arkk = ArkxEtf('ARKK')
# arkkTks = arkk.get_tickerlist()
# arkkTks = [x for x in arkkTks if x == x] #drop all nan
# print(arkkTks)

### Create tables