# Building base data
We will run a set of programs to understand how ib_insync generates data using async

## SET THE MARKET

In [1]:
MARKET = "SNP"

## IMPORTS, CONNECTIONS, LOG, TIMER

In [2]:
import asyncio
import pickle
import sys
import time
import pandas as pd
import random

from collections import defaultdict
from datetime import datetime
from pprint import pprint

from ib_insync import *

from ib01_getsyms import get_syms
from support import timestr

random.seed(8888)

In [3]:
import nest_asyncio
util.startLoop()
nest_asyncio.apply()

pd.options.display.max_columns = None

In [4]:
HOST = '127.0.0.1'
PORT = 3000 if MARKET.upper() == 'NSE' else 1300
CID = 0
MASTERCID = 10

In [5]:
# Direct logs to file with level at WARNING (30)
util.logToFile(path='./data/data.log', level=30)
with open('./data/data.log', 'w'): # to clear the log
    pass

## UNDERLYING SYMBOLS AND LOTS

In [6]:
# get all the symbols
df_syms = get_syms(MARKET)

# ...make the symbols unique
symbols = set(df_syms.symbol)

# ...build the contracts
raw_cts = [i for j in [[Stock(symbol, exchange, currency), Index(symbol, exchange, currency)]
                       for symbol, exchange, currency
                       in zip(df_syms.symbol, df_syms.exchange, df_syms.currency)] for i in j]

# raw_cts = raw_cts[18:25]  # !!! DATA LIMITER !!!

In [7]:
%%time

## Qualify the underlyings
ib = IB()

with ib.connect(HOST, PORT, CID) as ib:
    qunds = ib.qualifyContracts(*raw_cts)

Wall time: 4.63 s


In [None]:
qunds[:5]

# OHLC DATA COROUTINE

In [9]:
async def ohlcCoro(c, DURATION=365):
    ohlc = await ib.reqHistoricalDataAsync(
                        contract=c,
                        endDateTime="",
                        durationStr=str(DURATION) + ' D',
                        barSizeSetting="1 day",
                        whatToShow="Trades",
                        useRTH=True)
    df = util.df(ohlc)
    df.insert(0, 'symbol', c.symbol)
    
#     await asyncio.sleep(5)  # tried a delay expecting processing to take 5 seconds for 365 days of data
# NOTE: introducing the above delay is unnecessary!!
    
    return df

## Single contract test

In [10]:
ct1 = qunds[0]
ct1

Index(conId=6886747, symbol='DJX', exchange='CBOE', currency='USD', localSymbol='DJX')

This does not work and **goes on an infinite loop**. 

But why?!! 

Because `qunds[0]` is an Index. The program is not able to find out ohlcs for an Index, when the market is not open

We will need to modify ib.run and introduce a `timeout` in it.

** Note ** : introduction of `timeout` in ib.run is good for one contract only. It is advisable to use asyncio.wait_for() for multiple tasks. 

In [11]:
%%time

util.logToConsole(level=30)
with ib.connect(HOST, PORT, CID) as ib:
    try:
        one_error_ohlc = ib.run(ohlcCoro(ct1), timeout=6)
    except Exception as e:
        one_error_ohlc = []
        print(f'Error{e}: Contract {ct1.symbol} does not give out an OHLC!!!')

one_error_ohlc

Error: Contract DJX does not give out an OHLC!!!
Wall time: 6.05 s


[]

Testing the same with a non-index contract

In [12]:
ct2 = qunds[10]
ct2

Stock(conId=4065, symbol='ABT', exchange='SMART', primaryExchange='NYSE', currency='USD', localSymbol='ABT', tradingClass='ABT')

In [13]:
%%time

util.logToConsole(level=30) # to check out any errors in the console
with ib.connect(HOST, PORT, CID) as ib:
    try:
        one_ohlc = ib.run(ohlcCoro(ct2, 5), timeout=7)
    except Exception as e:
        one_ohlc = []
        print(f'Error{e}: Contract {ct2.symbol} does not give out an OHLC!!!')
util.logToFile(path='./data/data.log', level=30) # remove INFOs and log to file
pprint(one_ohlc, depth = None)

  symbol        date    open    high     low   close  volume   average  \
0    ABT  2020-07-20  100.00  100.71   98.72   99.08   41964   99.6735   
1    ABT  2020-07-21   98.88   99.48   97.76   98.19   31481   98.4470   
2    ABT  2020-07-22   98.11  100.57   98.00  100.19   32047   99.5895   
3    ABT  2020-07-23  100.28  101.98  100.07  100.82   39651  100.9755   
4    ABT  2020-07-24  100.21  100.21   98.36   98.95   31654   99.0460   

   barCount  
0     24263  
1     20045  
2     21046  
3     25659  
4     20869  
Wall time: 2.99 s


## Multiple contract test
#### Test 50 good contracts using simple` gather`
We found that OHLC doesn't work for SNP Index. So, let us start by using a list of good (non-Index) contracts, using a simple `gather`

In [None]:
und_cts1 = [q for q in qunds if isinstance(q, Stock)]
und_cts1 = und_cts1[:50]  # !!! DATA LIMITER for 50 max simultaneous API hist records !!!
len(und_cts1)
# und_cts1

In [None]:
%%time

with ib.connect(HOST, PORT, CID) as ib:
    task1 = ib.run(asyncio.gather(*[ohlcCoro(c) for c in und_cts1]))

pd.concat(task1, ignore_index=True).groupby('symbol').head(1)

#### Test a mixed-bag of 6 good and bad contracts using wait_for

In [14]:
und_cts2 = [q for q in qunds if isinstance(q, Index)][:3] + \
            random.sample([q for q in qunds if isinstance(q, Stock)], 3)
und_cts2 = random.sample(und_cts2, 6)
und_cts2

[Stock(conId=7930, symbol='HD', exchange='SMART', primaryExchange='NYSE', currency='USD', localSymbol='HD', tradingClass='HD'),
 Index(conId=416904, symbol='SPX', exchange='CBOE', currency='USD', localSymbol='SPX'),
 Stock(conId=107113386, symbol='FB', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='FB', tradingClass='NMS'),
 Index(conId=13455763, symbol='VIX', exchange='CBOE', currency='USD', localSymbol='VIX'),
 Index(conId=6886747, symbol='DJX', exchange='CBOE', currency='USD', localSymbol='DJX'),
 Stock(conId=273544, symbol='QCOM', exchange='SMART', primaryExchange='NASDAQ', currency='USD', localSymbol='QCOM', tradingClass='NMS')]

In [33]:
%%time

with ib.connect(HOST, PORT, CID) as ib:
    
    async def get_ohlc():
        tasks = [ohlcCoro(c) for c in und_cts2]
        return asyncio.gather(*tasks)
    
    tasks2 = await get_ohlc()

SyntaxError: 'await' outside function (<timed exec>, line 7)

In [26]:
    while tasks2:
        done, pending = asyncio.wait_for(tasks2, 10)
        
        for task in done:
            result = task.result()
            pprint(result)
        tasks2 = pending



TypeError: cannot unpack non-iterable coroutine object

In [None]:
async def mktdataCoro(c, FILL_DELAY=5):

    tick = ib.reqMktData(c, '456, 104, 106, 100, 101, 165')
    await asyncio.sleep(FILL_DELAY)

    ib.cancelMktData(c)

    return {c.symbol: {'mdata': tick}}

In [None]:
async def chainsCoro(c):
    chains = ib.reqSecDefOptParamsAsync(underlyingSymbol=c.symbol,
                                               futFopExchange="",
                                               underlyingSecType=c.secType,
                                               underlyingConId=c.conId)
    return {c.symbol: {'chains': chains}}

In [None]:
%%time
with ib.connect(HOST, PORT, CID) as ib:
    qunds = ib.qualifyContracts(*raw_cts[:50])
    async def coro(c):
        ohlc_task = [ohlcCoro(c, 365) for c in qunds]
        mkt_task = [mktdataCoro(c, 5) for c in qunds]
        chain_task = [chainsCoro(c) for c in qunds]
        tasks = ohlc_task + mkt_task + chain_task
        return await asyncio.gather(*tasks)
    
    tasks = [coro(c) for c in qunds]
    
    r = []
    
    while tasks:
        done, pending = ib.run(asyncio.as_completed(tasks))
        
        for task in done:
            result = task.result()
            
            if result:
                r.append(result)
        tasks = pending

# print(r)

In [None]:
r[20]

In [None]:
with open('./data/first.pkl', 'wb') as f:
    pickle.dump(r, f, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
        
    
    
    print("Get first result:")
    done, pending = ib.run(asyncio.wait(tasks, return_when=asyncio.FIRST_COMPLETED))
    
    for task in done:
        print(task.result())
    print("pending:", len(pending))
    
    print("\nGet more results in 15 seconds:\n")
    done2, pending2 = ib.run(asyncio.wait(pending, timeout=15))
    
    for task in done2:
        print(task.result())
    print("pending:", len(pending2))
    

In [None]:
    blk = 5
    cb = [qunds[i: i+5] for i in range(0, len(qunds), 5)]
    result = [ib.run(coro(c)) for c in cb]

In [None]:
r = [i for j in result for i in j]

In [None]:
list(list(r[5939].values())[0].values())[0].result()

In [None]:
ib.run?

In [None]:
async def get_data(raw_cts):
    '''Sequentially getting data from each qualified contract'''
    
    d = []
    
    qunds = [ib.qualifyContractsAsync(*raw_cts)]
    
    
    while qunds:
        d1, p1 = await asyncio.wait(qunds, return_when = asyncio.FIRST_COMPLETED)
        
        for x in d1:
            qct = x.result()
            
            for c in qct:
                
                tasks = [asyncio.create_task(mktdataCoro(c)), 
                         asyncio.create_task(ohlcCoro(c)),
                         asyncio.ensure_future(ib.reqSecDefOptParamsAsync(underlyingSymbol=c.symbol,
                                               futFopExchange="",
                                               underlyingSecType=c.secType,
                                               underlyingConId=c.conId))]
                while tasks:
                    d2, p2 = await asyncio.wait(tasks, return_when = asyncio.FIRST_COMPLETED)
                    
                    for y in d2:
                        data = y.result()
                        
                        print(f'{c.symbol} data collected!')
                        d.append([c.symbol, type(data), data])
                    
                    tasks = p2
        
        qunds = p1
        
    return d

In [None]:
ib = IB()

In [None]:
%%time
with ib.connect(HOST, PORT, CID) as ib:
    data = ib.run(get_data(raw_cts))

In [None]:
data[1]

In [None]:
async def make_tasks(c):
    tasks = [asyncio.create_task(ohlcCoro(c, DURATION=2), name=c.symbol+'_'+'ohlc'),
             asyncio.create_task(mktdataCoro(c, FILL_DELAY=5), name=c.symbol+'_'+'mdata'),
             asyncio.create_task(chainsCoro(c), name=c.symbol+'_'+'chains')]
    
    return asyncio.gather(tasks)

In [None]:
async def get_pll_data(raw_cts):
    '''Getting data parallely'''
    
    d = dict()
    
    qunds = await ib.qualifyContractsAsync(*raw_cts)
    
    for c in qunds:
        exec(c.symbol + "=asyncio.gather(*[ohlcCoro("+c+",2), mktdataCoro("+c+",5), chainsCoro("+c+")])")
        d[c.symbol] = asyncio.gather(*[ohlcCoro(c, 2), mktdataCoro(c, 5), chainsCoro(c)])
        
    
    
    """tasks = [i for j in [[ohlcCoro(c, 2), mktdataCoro(c, 5), chainsCoro(c)] for c in qunds] for i in j]
    
    print(tasks)
    
    while tasks:
        
        done, pending = await asyncio.wait(tasks, return_when = asyncio.FIRST_COMPLETED)
        
        for task in done:
            data = task.result()
            print(f'Completed {data} \n')            
            d.append(data)
            
        tasks = pending"""
    
    return d

In [None]:
%%time
with ib.connect(HOST, PORT, CID) as ib:
    data = ib.run(get_pll_data(raw_cts))

In [None]:
data

In [None]:
list(data.values())[0].result()

In [None]:
import asyncio
from pprint import pprint

import random

async def coro(tag):
    print(">", tag)
    await asyncio.sleep(random.uniform(1, 3))
    print("<", tag)
    return tag


loop = asyncio.get_event_loop()

group1 = asyncio.gather(*[coro("group 1.{}".format(i)) for i in range(1, 6)])
group2 = asyncio.gather(*[coro("group 2.{}".format(i)) for i in range(1, 4)])
group3 = asyncio.gather(*[coro("group 3.{}".format(i)) for i in range(1, 10)])

all_groups = asyncio.gather(group1, group2, group3)

results = loop.run_until_complete(all_groups)

loop.close()

pprint(results)