In [38]:
# Imports

from ibpythonic import ibConnection, message
from ibapi.contract import Contract

from time import sleep, strftime
from datetime import datetime
import pickle
import os
from multiprocessing import Pool

import workers

import pandas as pd
import numpy as np

from tinydb import TinyDB, Query

from tqdm import tqdm_notebook as tqdm

In [39]:
# Create contract object

def makeStkContract(contractTuple):
    newContract = Contract()
    newContract.symbol = contractTuple[0]
    newContract.secType = contractTuple[1]
    newContract.exchange = contractTuple[2]
    newContract.currency = contractTuple[3]
    return newContract

In [40]:
# Control variables declaration

bar_data_list = []
busy = False
active_contract_id = 0

In [51]:
# Historical data handler

def my_hist_data_handler(msg):
    global bar_data_list
    bar_data_list.append(msg.bar)

In [47]:
# End of historical data handler

def my_hist_data_end_handler(msg):
    print('my_hist_data_end_handler: ' + str(msg))
    global bar_data_list
    global busy
    global active_contract_id
    

    ####### Write data to pickle file
    if False:
        # Merge and write received data to pickle file
        symbol_db = TinyDB('symbol_db.json')
        my_query = Query()
        result = symbol_db.search(my_query.Id == active_contract_id)
        current_symbol = result[0]
        sy = current_symbol['Symbol']
        ex = current_symbol['Exchange']

        # textfile = open('data/' + sy + '_' + ex + '.txt', 'w+')
        # textfile.write("\n".join(bar_data_list))
        # textfile.close()

        with open('data/pickles/' + sy + '_' + ex, 'wb') as fp:
            pickle.dump(bar_data_list, fp)

        # write finished info to symbol database
        timestamp_now = datetime.now()
        string_now = timestamp_now.strftime('%Y-%m-%d')
        symbol_db.update({'Status': string_now}, my_query.Id == active_contract_id)
        symbol_db.close()

        bar_data_list = []
        active_contract_id = []
        busy = False    
    

    ####### Append data to existing CSV file
    if True:
        # Merge collected data into dataframe
        df_new = pd.DataFrame(columns=('date', 'open', 'high', 'low', 'close', 'volume'))
        for bar in bar_data_list:
            row = {'date': bar.date, 'open': bar.open, 'high': bar.high, 'low': bar.low, 'close': bar.close, 'volume': bar.volume}
            df_new = df_new.append(row, ignore_index=True)
        # df_new.loc[:, 'date'] = pd.to_datetime(df_new.date, format='%Y%m%d') #'%Y%m%d  %H:%M:%S'
        df_new = df_new.set_index('date')
        
        # Check if there is already existing data
        print('active_contract_id: ' + str(active_contract_id))
        symbol_db = TinyDB('symbol_db.json')
        my_query = Query()
        result = symbol_db.search(my_query.Id == active_contract_id)
        symbol_db.close()
        print('result: ' + str(result))
        current_contract = result[0]        
        contract_status = current_contract['Status']
        contract_symbol = current_contract['Symbol']
        contract_exchange = current_contract['Exchange']
        filename = '/Users/martin/Google Drive/data/screener/' + contract_symbol + '_' + contract_exchange + '.csv'
        print('data_end: ' + contract_symbol + '_' + contract_exchange)
        
        # Append to existing file or create new file
        if symbol_status.startswith('data_ends:') or symbol_status.startswith('2019'):
            print('data_end: if')
            df_old = pd.read_csv(filename, index_col='date')
            df_combined = df_new.combine_first(df_old)
            os.remove(filename)
            df_combined.to_csv(filename)
        else:
            print('data_end: else')
            if os.path.exists(filename):
                os.remove(filename)
            df_new.to_csv(filename)

        # write finished info to symbol database
        timestamp_now = datetime.now()
        string_now = timestamp_now.strftime('%Y-%m-%d')
        symbol_db = TinyDB('symbol_db.json')
        my_query = Query()
        symbol_db.update({'Status': 'data_ends:'+string_now}, my_query.Id == active_contract_id)
        symbol_db.close()

        bar_data_list = []
        active_contract_id = 0
        busy = False    
    
    
    ####### Write data to new CSV file
    if False:
        # Merge collected data into dataframe
        df = pd.DataFrame(columns=('date', 'open', 'high', 'low', 'close', 'volume'))
        for bar in bar_data_list:
            row = {'date': bar.date, 'open': bar.open, 'high': bar.high, 'low': bar.low, 'close': bar.close, 'volume': bar.volume}
            df = df.append(row, ignore_index=True)
        df.loc[:, 'date'] = pd.to_datetime(df.date, format='%Y%m%d') #'%Y%m%d  %H:%M:%S'
        df = df.set_index('date')

        # Write dataframe to csv file
        symbol_db = TinyDB('symbol_db.json')
        my_query = Query()
        result = symbol_db.search(my_query.Id == active_contract_id)
        symbol_db.close()

        current_symbol = result[0]
        sy = current_symbol['Symbol']
        ex = current_symbol['Exchange']
        df.to_csv('data/' + sy + '_' + ex + '.csv') 


In [48]:
# Error handler

def my_error_handler(msg):
    print('my_error_handler: ' + str(msg))
    
    global bar_data_list
    global busy
    global active_contract_id
    
    # write msg info to symbol database
    symbol_db = TinyDB('symbol_db.json')
    my_query = Query()
    # todo: clean message
    symbol_db.update({'Status': 'error:'+msg.errorMsg}, my_query.Id == active_contract_id)
    symbol_db.close()
    
    bar_data_list = []
    active_contract_id = 0
    busy = False

In [49]:
# Data retrieval constants

endtime = strftime('%Y%m%d %H:%M:%S')
start_id = 0
end_id = 1

In [50]:
# Create connection object

con = ibConnection(port=7497)
con.register(my_hist_data_handler, message.historicalData)
con.register(my_hist_data_end_handler, message.historicalDataEnd)
con.register(my_error_handler, message.error)

True

In [52]:
# Main function

con.connect()
    
for index in tqdm(range(start_id, end_id)):
    global busy
    global active_contract_id
    
    # Get symbol data from symbol db
    symbol_db = TinyDB('symbol_db.json')
    my_query = Query()
    result = symbol_db.search(my_query.Id == index)
    symbol_db.close()
    current_symbol = result[0]
    print(current_symbol)
    symbol_status = current_symbol['Status']       
    
    # Check if CSV file is already present    
    if symbol_status.startswith('data_ends:') or symbol_status.startswith('2019'):
        if symbol_status.startswith('data_ends:'):
            start_date = (symbol_status.split(':'))[1]
        else:
            start_date = symbol_status
        end_date = datetime.today().strftime('%Y-%m-%d')
        ndays = np.busday_count(start_date, end_date)
        ndays += 4
        req_string = str(ndays) + ' D'  
    else:
        req_string = "10 Y"
    print(req_string)

    # todo: stop on certain error status, eg no connection
    
    # Create contract and request data
    contractTuple = (current_symbol['Symbol'], 'STK', current_symbol['Exchange'], current_symbol['Currency'])
    stkContract = makeStkContract(contractTuple)
    active_contract_id = index
    busy = True
    con.reqHistoricalData(7,stkContract,endtime,req_string,'1 day','MIDPOINT',1,1, keepUpToDate=False, chartOptions=[])
    
    # Wait for data download and storage
    while busy is True:
        sleep(0.2)
    sleep(0.2)
    print('-------------------------')
con.disconnect()

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

my_error_handler: <error id=-1, errorCode=2104, errorMsg=Market data farm connection is OK:eufarm>
{'Id': 0, 'Symbol': 'BNQF', 'Name': 'Collateralized ETC on RICI Enhanced Gas Oil TR Index', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': 'data_ends:2019-10-05'}
4 D
my_error_handler: <error id=-1, errorCode=2104, errorMsg=Market data farm connection is OK:usfarm>
my_error_handler: <error id=-1, errorCode=2106, errorMsg=HMDS data farm connection is OK:euhmds>
my_error_handler: <error id=-1, errorCode=2106, errorMsg=HMDS data farm connection is OK:fundfarm>
my_error_handler: <error id=-1, errorCode=2106, errorMsg=HMDS data farm connection is OK:ushmds>
my_error_handler: <error id=-1, errorCode=2158, errorMsg=Sec-def data farm connection is OK:secdefnj>
<historicalData reqId=7, bar=Date: 20190930, Open: 45.936000, High: 46.162000, Low: 45.629000, Close: 45.741000, Volume: -1, Average: -1.000000, BarCount: -1>
<historicalData reqId=7, bar=Date: 20191001, Open: 45.510000, High: 45.890000, L

True

In [11]:
# Qurey data from symbol database

symbol_db = TinyDB('symbol_db.json')
my_query = Query()
result = symbol_db.search(my_query.Id <= 203)
for item in result:
    print(item)
symbol_db.close()

{'Id': 0, 'Symbol': 'BNQF', 'Name': 'Collateralized ETC on RICI Enhanced Gas Oil TR Index', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': 'Market data farm connection is OK:usfarm.nj'}
{'Id': 1, 'Symbol': 'CNB', 'Name': 'Lyxor Euro Corporate Bond Ex Financials UCITS ETF', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': '2019-09-29'}
{'Id': 2, 'Symbol': 'UEF5', 'Name': 'UBS ETF - MSCI Emerging Markets Socially Responsible UCITS ETF', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': '2019-09-29'}
{'Id': 3, 'Symbol': 'GOMA', 'Name': 'BNP Paribas Easy Barclays Euro Government Inflation Linked All Maturities ETF', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': '2019-09-29'}
{'Id': 4, 'Symbol': 'X0BM', 'Name': 'Coba ETC -2 x Platinum Daily Short Index', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': '2019-09-29'}
{'Id': 5, 'Symbol': 'FLOT', 'Name': 'Lyxor Barclays Floating Rate Euro 0-7Y UCITS ETF', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': '2019-09-29'}
{'Id': 6, 'Symbol': 'X0E2', 

In [10]:
# Read all symbol data to dataframe

symbol_db = TinyDB('symbol_db.json')
data_list = str(symbol_db.all())
symbol_db.close()

data_list = data_list.replace("'", '"')
df = pd.read_json(data_list, orient='records')
df

ValueError: Unexpected character found when decoding object value

In [47]:
 # Manual disconnection of connection object
    
con.disconnect()

False

In [27]:
# Create CSV files from pickles

file_list = []
for filename in os.listdir('data/pickles/'):
    file_list.append(filename)
    
if __name__ ==  '__main__': 
    with Pool(4) as p:
        p.map(workers.pick_to_csv, file_list)

UnpicklingError: invalid load key, '\x00'.

# Enhancements

### Speedup
- download next symbol as soon as data is in file

### Extra features for downloading
- start time for downloading is end time of data. last candle of data is considered invalid
- check for splits/mergers
- Stream logger for status messages

### Symbol in multiple exchanges
- if symbol is multiple times in list
    - load data from primary exchange if ppossible
    - if primary is not available, load from exchange with highest average (price-)volume

### Symbol database
- pull symbols from ib webpage
- pull symbol data from ib
- abillity to create own univestes (=symbol lists) based on symbol data
- Ignore Messages with error id = -1 (see source)
    <error id=-1, errorCode=2104, errorMsg=Market data farm connection is OK:usfarm.nj>
    <error id=-1, errorCode=2104, errorMsg=Market data farm connection is OK:eufarm>
    <error id=-1, errorCode=2104, errorMsg=Market data farm connection is OK:cashfarm>
    <error id=-1, errorCode=2104, errorMsg=Market data farm connection is OK:usfarm>
    <error id=-1, errorCode=2106, errorMsg=HMDS data farm connection is OK:euhmds>
    <error id=-1, errorCode=2106, errorMsg=HMDS data farm connection is OK:ushmds>
    <error id=1, errorCode=162, errorMsg=Historical Market Data Service error message:HMDS query returned no data: 0I9M@LSE Ask>

### Storage of price data
- store price data in database? sqlite, as handeled in one file?
- cloud storage of data

### Misc
- 

In [14]:
symbol_db = TinyDB('symbol_db.json')
my_query = Query()
result = symbol_db.search(my_query.Id == 0)
symbol_db.close()


current_contract = result[0]
print(current_contract)

{'Id': 0, 'Symbol': 'BNQF', 'Name': 'Collateralized ETC on RICI Enhanced Gas Oil TR Index', 'Currency': 'EUR', 'Exchange': 'FWB', 'Status': 'error:Sec-def data farm connection is OK:secdefnj'}
