# Building base data
We will run a set of programs to understand how ib_insync generates data using async

## SET THE MARKET

In [None]:
MARKET = "NSE"

## IMPORTS, CONNECTIONS, LOG, TIMER

In [None]:
import asyncio
import pickle
import sys
import time
import pandas as pd
import random

from collections import defaultdict
from datetime import datetime
from pprint import pprint

from ib_insync import *

from ib01_getsyms import get_syms
from support import timestr

from typing import Callable, Coroutine

random.seed(8888)

if sys.version_info[0] == 3 and sys.version_info[1] >= 8 and sys.platform.startswith('win'):
    asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

In [None]:
import nest_asyncio
util.startLoop()
nest_asyncio.apply()

pd.options.display.max_columns = None

In [None]:
HOST = '127.0.0.1'
PORT = 4004 if MARKET.upper() == 'NSE' else 4002 # Paper trades!
CID = 0
MASTERCID = 10

ib = IB()

In [None]:
# Direct logs to file with level at WARNING (30)
util.logToFile(path='./data/data.log', level=30)
with open('./data/data.log', 'w'): # to clear the log
    pass

## UNDERLYING SYMBOLS AND LOTS

In [None]:
%%time
# get all the symbols
df_syms = get_syms(MARKET)

# ...make the symbols unique
symbols = set(df_syms.symbol)

# ...build the contracts
raw_cts = [i for j in [[Stock(symbol, exchange, currency), Index(symbol, exchange, currency)]
                       for symbol, exchange, currency
                       in zip(df_syms.symbol, df_syms.exchange, df_syms.currency)] for i in j]

# raw_cts = raw_cts[18:25]  # !!! DATA LIMITER !!!

In [None]:
%%time

## Qualify the underlyings
with ib.connect(HOST, PORT, CID) as ib:
    qunds = ib.qualifyContracts(*raw_cts)

qunds = list({q for q in qunds}) # Remove duplicates

# COROUTINES

## OHLC coroutine

In [None]:
async def ohlcCoro(c, DURATION=365):
    ohlc = await ib.reqHistoricalDataAsync(
                        contract=c,
                        endDateTime="",
                        durationStr=str(DURATION) + ' D',
                        barSizeSetting="1 day",
                        whatToShow="Trades",
                        useRTH=True)
    await asyncio.sleep(5)
    df = util.df(ohlc)
    try:
        df.insert(0, 'symbol', c.symbol)
    except AttributeError:
        df = None
    return df

## Market data coroutine

In [None]:
async def mktdataCoro(c, FILL_DELAY=5):

    tick = ib.reqMktData(c, '456, 104, 106, 100, 101, 165')
    await asyncio.sleep(FILL_DELAY)
    ib.cancelMktData(c)
    
    m_df = pd.DataFrame(util.df([tick]))
    
    div_df = pd.DataFrame(m_df.dividends.tolist())
    df1 = m_df.drop('dividends', 1).join(div_df)
    df1.insert(0, 'symbol', [c.symbol for c in df1.contract])

    df2 = df1.dropna(axis=1)

    # Extract columns with legit values in them
    df3 = df2[[c for c in df2.columns if df2.loc[0, c]]]

    return df3

## Chains coroutine

In [None]:
async def chainsCoro(c):
    chains = await ib.reqSecDefOptParamsAsync(underlyingSymbol=c.symbol,
                                               futFopExchange="",
                                               underlyingSecType=c.secType,
                                               underlyingConId=c.conId)
    

    # Pick up one chain if it is a list
    chain = chains[0] if isinstance(chains, list) else chains

    df1 = pd.DataFrame([chain])

    # Do a cartesian merge
    df2 = pd.merge(pd.DataFrame(df1.expirations[0], columns=['expiry']).assign(key=1), 
             pd.DataFrame(df1.strikes[0], columns=['strike']).assign(key=1), on='key').\
                merge(df1.assign(key=1)).rename(columns={'tradingClass': 'symbol', 'multiplier': 'mult'})\
                    [['symbol', 'expiry', 'strike', 'exchange', 'mult']]

    return df2

## Base Cororutine

In [None]:
async def baseCoro(qunds:list) -> None: 
    for c in qunds:
        todo.add(asyncio.create_task(ohlcCoro(c, DURATION=365), name=c.symbol+'_ohlc'))
        todo.add(asyncio.create_task(mktdataCoro(c, FILL_DELAY=11), name=c.symbol+'_und'))
        todo.add(asyncio.create_task(chainsCoro(c), name=c.symbol+'_chains'))

## Progress Coroutine

In [None]:
async def progressAsync(cts, 
                        algo: Callable[..., Coroutine],
                        save_algo: Callable[..., Coroutine],
                        pkl_timeout: float=4.0,
                        total_timeout: float=0.0,
                        FSPATH: str='./data/',
                        ) -> None:
    
    # create a task for the algo
    task = asyncio.create_task(algo(cts), name=algo.__name__)
    
    todo.add(task) # add task to the asyncio loop
    
    start = time.time()
    
    while len(todo):
        
        done, pending = await asyncio.wait(todo, timeout=pkl_timeout)
        
        # remove done task from todo after the timeout, update result and pickle it
        todo.difference_update(done)
        result.update(done)
        save_algo(FSPATH, result)
        
        # report pendings
        pending_names = (t.get_name() for t in todo)
        print(f"{len(todo)}: "+ " ".join(sorted(pending_names))[-75:])
        
        # check for total_timeout
        if total_timeout > 0.0:
            if time.time() - start > total_timeout:
                print(f'\nProgram exceeded total_timeout of {total_timeout} seconds')
                print(f'Cancelling pending todos')
                for task in todo:
                    task.cancel()
                done, pending = await asyncio.wait(todo, timeout=1.0)
                todo.difference_update(done)
                todo.difference_update(pending) 
    
    # success!
    save_algo(FSPATH, result)
    end = time.time()
    print(f"Took {int(end-start)} seconds")

## Saving base progress output function

In [None]:
def saveBase(FSPATH, result):
    ohlcs = []
    unds = []
    chains = []
    basetype = []
    for v in list(result):
        try:
          basetype = v.get_name().split('_')[1]
        except IndexError as e:
#             print(f"{v.get_name()} is not a valid base df")
            pass
        if basetype == 'ohlc':
            ohlcs.append(v.result())
        if basetype == 'und':
            unds.append(v.result())
        if basetype == 'chains':
            chains.append(v.result())

    # build the dataframes and pickle
    if unds:
        df_unds = pd.concat(unds, ignore_index=True)
        df_unds.to_pickle(FSPATH+'df_unds.pkl')
    
    if ohlcs:
        df_ohlcs = pd.concat(ohlcs, ignore_index=True)
        df_ohlcs.to_pickle(FSPATH+'df_ohlcs.pkl')
        
    if chains:
        df_chains = pd.concat(chains, ignore_index=True)
        df_chains.to_pickle(FSPATH+'df_chains.pkl')

In [None]:
%%time

# Getting the base ready
cts = qunds
todo = set()
result = set()
with ib.connect(HOST, PORT, CID) as ib:
    ib.run(progressAsync(cts=cts, algo=baseCoro, save_algo=saveBase, total_timeout=0))

# Making df_opts

In [None]:
FSPATH: str='./data/'
df_chains = pd.read_pickle(FSPATH+'df_chains.pkl')

df_ch1 = pd.concat([df_chains.assign(right='P'), 
          df_chains.assign(right='C')], 
          ignore_index=False)

puts = [Option(symbol=s, lastTradeDateOrContractMonth=e, strike=k, right='P', exchange=x) 
                    for s, e, k, x in 
                    zip(df_ch1.symbol, df_ch1.expiry, df_ch1.strike, ['NSE' 
                        if MARKET.upper() == 'NSE' else 'SMART']*len(df_ch1))]

calls = [Option(symbol=s, lastTradeDateOrContractMonth=e, strike=k, right='C', exchange=x) 
                    for s, e, k, x in 
                    zip(df_ch1.symbol, df_ch1.expiry, df_ch1.strike, ['NSE' 
                        if MARKET.upper() == 'NSE' else 'SMART']*len(df_ch1))]

raw_opts = puts + calls

cts = raw_opts[:300]
cts

## Qualifying options

In [None]:
pkl_timeout = 10
todo = set()
result = set()

In [None]:
async def qualCoro(opts: list):
    contracts = await ib.qualifyContractsAsync(*opts)
    await asyncio.sleep(0)
    return contracts

In [None]:
async def qOptsCoro(opts: list) -> None:
    blk = 100
    optblks = [opts[i: i+blk] for i in range(0, len(opts), blk)]
    for b in optblks:
        todo.add(asyncio.create_task(qualCoro(b), 
                                     name=f'{b[0].symbol}{b[0].lastTradeDateOrContractMonth[:-2]}{b[0].right}{b[0].strike}'+\
                                             f'{b[len(b)-1].symbol}{b[len(b)-1].lastTradeDateOrContractMonth[:-2]}{b[len(b)-1].right}{b[len(b)-1].strike}'))

In [None]:
with ib.connect(HOST, PORT, CID) as ib:
    
    asyncio.create_task(qOptsCoro(cts), name=qOptsCoro.__name__)
    
    while len(todo):

        done, pending = await asyncio.wait(todo, timeout=pkl_timeout)

        # remove done task from todo after the timeout, update result and pickle it
        todo.difference_update(done)
        result.update(done)

        # report pendings
        pending_names = (t.get_name() for t in todo)
        print(f"{len(todo)}: "+ " ".join(sorted(pending_names))[-75:])

In [None]:
result

In [None]:
done

In [None]:

total_timeout = 0

In [None]:
todo.add(task) # add task to the asyncio loop

In [None]:
todo

In [None]:
start = time.time()

while len(todo):

    done, pending = await asyncio.wait(todo, timeout=pkl_timeout)

    # remove done task from todo after the timeout, update result and pickle it
    todo.difference_update(done)
    result.update(done)
    
    # report pendings
    pending_names = (t.get_name() for t in todo)
    print(f"{len(todo)}: "+ " ".join(sorted(pending_names))[-75:])

    # check for total_timeout
    if total_timeout > 0.0:
        if time.time() - start > total_timeout:
            print(f'\nProgram exceeded total_timeout of {total_timeout} seconds')
            print(f'Cancelling pending todos')
            for task in todo:
                task.cancel()
            done, pending = await asyncio.wait(todo, timeout=1.0)
            todo.difference_update(done)
            todo.difference_update(pending) 
    
# success!
# saveQuals(FSPATH, result)
end = time.time()
print(f"Took {int(end-start)} seconds")

In [None]:
task

## Saving option qualification progress output function

In [None]:
def saveQuals(FSPATH, result):
    
    with open(FSPATH+'qualed_opts.pkl', 'wb') as handle:
        pickle.dump((r.result() for r in result), 
                    handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
%%time

# Getting the qualifications ready

ib = IB()

cts = raw_opts[:500]
todo = set()
result = set()
with ib.connect(HOST, PORT, CID) as ib:
    ib.run(progressAsync(cts=cts, algo=qOptsCoro, save_algo=saveQuals, total_timeout=0))