In [None]:
#################################################
######Code to read data from database API sources
#################################################

In [None]:
# install required IB packages
import sys
!{sys.executable} -m pip install --user msgpack
!{sys.executable} -m pip install --user ib_insync

# import required libraries
import requests
import urllib.request
import time
import re
import pickle
from bs4 import BeautifulSoup
import os
from os import walk
import errno
from pathlib import Path
import json
import ib_insync as iblib

# begin synchronous library for IB requests
iblib.util.startLoop()

In [None]:
#Details of connections
AV_API_KEY = 'SECRET'
IB_IP = ''
IB_PORT = 0
IB_CLIENTID = 0

In [None]:
#Collect the exchange names on the IB website
urls =['https://www.interactivebrokers.co.uk/en/index.php?f=37908',
       'https://www.interactivebrokers.co.uk/en/index.php?f=37908&p=europe',
       'https://www.interactivebrokers.co.uk/en/index.php?f=37908&p=asia',
       'https://www.interactivebrokers.co.uk/en/index.php?f=37908&p=south_america',
       ]

#store the symbols
exchange_symbols = []

# function to extract the exchange symbol
def get_exchange(text):
    m = re.search('exch=(.+?)&', text)
    if m:
        return m.group(1)
    return ""

#generate the list of exhange symbols
for regional_url in urls:
    response = requests.get(regional_url)
    soup = BeautifulSoup(response.text, "html.parser")
    all_tags = soup.find_all('a')
    # now get all the exchange symbols
    for atag in soup.find_all('a'):
        url = atag.attrs['href']
        symbol = get_exchange(url)
        if(symbol != ""):
            exchange_symbols.append(symbol)

In [None]:
#Now we have all the exchanges in a list we need to pull back all stock codes
symbol_prefix = "https://www.interactivebrokers.co.uk/en/index.php?f=41295&exch="
symbol_postfix = "&showcategories=STK&p=&cc=&limit=1000&page="
exchange_symbols =[]
all_symbols =[]

#check each exhange
for exchange in exchange_symbols:    
    #check if this exchange has stocks
    exchange_url = "https://www.interactivebrokers.co.uk/en/index.php?f=2222&exch=" + exchange   
    
    response = requests.get(exchange_url)    
    soup = BeautifulSoup(response.text, "html.parser")
    if( soup.find("a", {"id": "STK"}) != None ):
        print(f"{exchange} has stock listing")    
        # now loop
        page = 1
        while (True):
            url = symbol_prefix + exchange + symbol_postfix + str(page)
            response = requests.get(url)
            soup = BeautifulSoup(response.text, "html.parser")            
            try:            
                all_tags = (soup.find_all('tbody')[2]).find_all('tr')
            except:
                print("third tag not found - assuming only two")
                all_tags = (soup.find_all('tbody')[1]).find_all('tr')
                
            if (len(all_tags)>0):
                for tr in all_tags:
                    #extract the details
                    tds = tr.find_all('td')
                    details ={"Exchange" : exchange, "IBSymbol" : tds[0].text.strip(), "Company" : tds[1].find_all('a')[0].text.strip(), "Symbol": tds[2].text.strip(), "Currency" : tds[3].text.strip()}
                    #append the details
                    all_symbols.append(details)
            else:
                #none found
                break
            print(F"processing page {page}")
            page += 1
    else:
        print(f"{exchange} has NO stock listing")
        pass
    
# now we save all_symbols to disk
with open('all_symbols.pkl', 'wb') as f:
    pickle.dump(all_symbols_sa_br, f)


In [None]:
#Read in the stock symbols
infile = open('all_symbols.pkl','rb')
all_symbols = pickle.load(infile)
infile.close()

In [None]:
#Establish a connection to IB
ib = iblib.IB()
ib.connect(IB_IP, IB_PORT, clientId=IB_CLIENTID, readonly=True)

In [None]:
#Extract the data from IB and write to disk
#total entries
IB_symbol_count = len(all_symbols)
print(IB_symbol_count)

cur_idx = 0
savepath = "IB/"
while cur_idx < IB_symbol_count:
    #iterate whilst we still have data to process
    this_symbol = all_symbols[cur_idx]["IBSymbol"]
    this_exchange = all_symbols[cur_idx]["Exchange"]
    this_ccy = all_symbols[cur_idx]["Currency"]
    
    this_path = savepath + this_ccy + "/" + this_symbol
    this_path_attempt = savepath + "nodata/" + this_ccy + "/" + this_symbol
    #we check if the data already exists for this symbol and currency pair    
    if (Path(this_path).is_file()):
        pass
        #data already exists
    else:
        #no data saved try and retrieve    
        #request this data
        contract = iblib.contract.Stock(symbol = this_symbol, exchange = this_exchange, currency = this_ccy)
        try:
            fund = ib.reqFundamentalData(contract=contract,reportType='ReportsFinStatements')   
            #now write this to disk if we have data
            if (len(fund)>1):
                if not os.path.exists(os.path.dirname(this_path)):
                    try:
                        os.makedirs(os.path.dirname(this_path))
                    except OSError as exc: # Guard against race condition
                        if exc.errno != errno.EEXIST:
                            raise
                with open(this_path, 'w') as filehandle:
                    filehandle.write(fund)
            else:
                #save an attempt file
                if not os.path.exists(os.path.dirname(this_path_attempt)):
                    try:
                        os.makedirs(os.path.dirname(this_path_attempt))
                    except OSError as exc: # Guard against race condition
                        if exc.errno != errno.EEXIST:
                            raise
                with open(this_path_attempt, 'w') as filehandle:
                    filehandle.write(fund)        
        except:
            print(f"error with entry {this_path}")
            with open("IB/log.txt", "a+") as myfile:
                myfile.write(this_path +"\n")
    
    #increment to next file
    cur_idx += 1
    if(cur_idx % 500 ==0):
        print(f"processing number {cur_idx} of {IB_symbol_count}")

In [None]:
#Read in all the currency and symbols we were able to get data for
exclude="nodata"
have_data = []
for (dirpath, dirnames, filenames) in walk(savepath,topdown=True):
    dirnames[:] = [d for d in dirnames if d not in exclude]
    if(len(dirnames)==0):
        ccy=dirpath.split("/")[1]
        for sym in filenames:
            have_data.append({"Currency":ccy, "IBSymbol": sym })
            
print(f"writing list of {len(have_data)} data items to disk")
with open('have_ib_data.pkl', 'wb') as f:
    pickle.dump(have_data, f)

In [None]:
#Read in the stock symbols for which we have IB data
infile = open('have_ib_data.pkl','rb')
have_data = pickle.load(infile)
infile.close()
print(len(have_data))

In [None]:
#Read in AV data
#now step through each symbol and find it on alphavantage
idx =0
symbol_search = "https://www.alphavantage.co/query?function=SYMBOL_SEARCH&apikey=" + AV_API_KEY + "&keywords="

while idx <len(have_data):
    #get the av symbol
    #first we try a direct search on the IB symbol
    ib_symbol = have_data[idx]["IBSymbol"]
    ib_currency = have_data[idx]["Currency"]
    res_set={}
    try:
        r = requests.get(symbol_search + ib_symbol)
        res_set = r.json()
    except requests.exceptions.RequestException as e:
        print(e)
        idx-=1
        time.sleep(30)
    
    #now check if we have an entry that matches both symbol and currency
    av_symbol = None
    av_currency = None
    av_found = False
    if 'bestMatches' in res_set:
        for match in res_set['bestMatches']:
            if((match['1. symbol']==ib_symbol) and (match['8. currency']==ib_currency)):
                #match found
                av_symbol=match['1. symbol']
                av_currency=match['8. currency']
                av_found=True
                break
    #continue searching if not found yet
    res_set={}
    try:
        r = requests.get(symbol_search + ib_symbol +'.')
        res_set = r.json()
    except requests.exceptions.RequestException as e:
        print(e)
        idx-=1
        time.sleep(30)
    #print(res_set)
    if 'bestMatches' in res_set:
        for match in res_set['bestMatches']:
            if((len(match['1. symbol'])>len(ib_symbol)) and (match['1. symbol'][0:len(ib_symbol)+1]==ib_symbol +'.') and (match['8. currency']==ib_currency)):
                #match found
                av_symbol=match['1. symbol']
                av_currency=match['8. currency']
                av_found=True
                break
    #check if we found a match
    if(av_found):
        #match found now we retrieve the data
        alpha_venture = 'https://www.alphavantage.co/query?function=TIME_SERIES_DAILY_ADJUSTED&outputsize=full&apikey=" + AV_API_KEY + "&symbol='
        data={}
        try:
            r = requests.get(alpha_venture + av_symbol)
            data = r.json()
        except requests.exceptions.RequestException as e:
            print(e)
            idx-=1
            time.sleep(30)
        
        #now save this to disk
        this_path = "AV/" +av_currency + "/" +ib_symbol +"_" + av_symbol
        #now write this to disk if we have data
        if not os.path.exists(os.path.dirname(this_path)):
            try:
                os.makedirs(os.path.dirname(this_path))
            except OSError as exc: # Guard against race condition
                if exc.errno != errno.EEXIST:
                    raise
    
        with open(this_path, 'w') as filehandle:
            filehandle.write(json.dumps(data))
    else:
        #no match found
        print(f"no match found to IBSymbol={ib_symbol} for Currency={ib_currency}")
    print(f"processing record {idx}")
    idx+=1
