In [1]:
#process the quantconnect notebook by sending a cell code, and reading response

from selenium import webdriver
from selenium.common.exceptions import NoSuchWindowException,WebDriverException,StaleElementReferenceException,NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

import datetime as dt
from datetime import timedelta

import pandas as pd

import time as tm
import json
import pickle
import pprint
from mt_utils import mt_dict_to_df,mt_df_to_dict

#from mt_data import options_cache,concat_dfs,add_underlying_cache,search_underlying_cache,add_options_cache



In [2]:
def wait_for_element(func,id,tries):
    while True:
        try:
            element = func(id)
            return element
        
        except NoSuchElementException as e:
            #print("except 1")
            tries -= 1
            if not tries:
                print("raise 5")
                raise e
            tm.sleep(.1)
            
def wait_minutes(count):
    while count:
        print("waiting {} minutes".format(count))
        tm.sleep(60)
        count-=1

def manual_login(driver,qc_info):

    log_in = wait_for_element(driver.find_element_by_xpath,'//a[@href="/login"]',10)

    log_in.click()
    
    social_group = wait_for_element(driver.find_element_by_class_name,'btn-social-group',10)
    
    buttons = social_group.find_elements_by_class_name('btn')
    buttons[1].click()
    
    fb_login_form = wait_for_element(driver.find_element_by_id,'loginform',10)
    
    fb_email = fb_login_form.find_element_by_id('email')
    fb_email.send_keys(qc_info['email'])
    fb_pass = fb_login_form.find_element_by_id('pass')
    fb_pass.send_keys(qc_info['password'])
    fb_login = fb_login_form.find_element_by_id('loginbutton')
    fb_login.click()
    wait_for_element(driver.find_element_by_class_name,'start-here-section',10)
    return

def load_research_notebook(driver):
    while True:
        file_list = wait_for_element(driver.find_elements_by_class_name,"file-name",10)
        if not len(file_list):
            tm.sleep(.1)
        else:
            break
            
    for f in file_list:
        if f.text == 'research.ipynb':
            print("clicking",f.text)
            f.click()
            break
            
    notebook_frame = wait_for_element(driver.find_element_by_class_name,"notebook-frame",10)        
    driver.switch_to.frame(notebook_frame)
            
def load_project(driver,info):
    driver.get(info['url_project'])
    
def datetime_as_args(date):
    return '{},{},{},{},{}'.format(date.year,date.month,date.day,date.time().hour,date.time().minute)

def send_combo_keys(driver,key1,key2):
    actions = ActionChains(driver)
    actions.key_down(key1)
    actions.send_keys(key2)
    actions.key_up(key1)

def send_multiline_text(driver,element,text_lines):
    text_area = element.find_element_by_tag_name("textarea")
    for line in text_lines:
        send_combo_keys(driver,Keys.ALT,Keys.ARROW_LEFT) # go to beginning of line in case of auto indention
        text_area.send_keys(line+ Keys.ENTER)  #send text

class Error(Exception):
    pass

class QcResponseError(Error):
    def __init__(self,error_word):
        self.error_string = error_word
        
class MissingStrikeError(Error):
    def __init__(self,eq_string,lt_string,gt_string):
        self.eq_strike = eq_string
        self.lt_strike = lt_string
        self.gt_strike = gt_string

class MissingExpireError(Error):
    def __init__(self,eq_string,lt_string,gt_string):
        self.eq_expire = eq_string
        self.lt_expire = lt_string
        self.gt_expire = gt_string

    pass

def get_cell(cell_num):
    cells = driver.find_elements_by_class_name("cell")

    cell = cells[cell_num]
    
    return cell


def get_code_mirror(cell):
    
    code_mirror = cell.find_element_by_class_name("CodeMirror")
    
    return code_mirror


def execute_cell(cell):
    
    get_code_mirror(cell).find_element_by_tag_name("textarea").send_keys(Keys.SHIFT + Keys.ENTER)   #execute cell with SHIFT_ENTER
    
def enter_cell(cell):
    
    code_mirror = get_code_mirror(cell)

    code_mirror.click()
    
    return code_mirror

 

def execute_cell_lines(cell,cell_lines):
    
    
    enter_cell(cell)
    
    code_mirror = get_code_mirror(cell)
    

    code_mirror.find_element_by_tag_name("textarea").send_keys(Keys.CONTROL + 'a')  #select all text in cell so it will be replaced
    send_multiline_text(driver,code_mirror,cell_lines)

    
    execute_cell(cell)
    

def get_cell_response(cell):
    
    output_subarea = wait_for_element(cell.find_element_by_class_name,"output_subarea",10000)  #read results
    
    return output_subarea.text

def get_parsed_cell_response(cell,tries = 200):

    while(tries):
        text = get_cell_response(cell)
        
        error = find_error(text)
        if error is not None:
            words = error.split()
            print("-----------------------------------found error:",error)
            if words[1] == 'MissingStrike':
                raise MissingStrikeError(words[2],words[3],words[4])
            if words[1] == 'MissingExpire':
                raise MissingExpireError(words[2],words[3],words[4])
            if words[0].find("Exception") >= 0:

                raise QcResponseError(words[0])
            print("reached here 1")
            return None
        
        start_index = find_start_line_sequence(text,'[')
        if start_index != None:
            return text[start_index:]
        
        tm.sleep(.2)
        tries -= 1
        
    return None

def find_start_line_sequence(text,start_char):
    last_char = '\n'
    
    for i,c in enumerate(text):
        
        if last_char == '\n' and c == start_char:
            return i
        
        last_char = c
        
    return None

def find_error(text):
    for line in text.splitlines():
#        if len(line) < 2000:
#            print("find_error",line)
        words = line.split()
        if len(words):
            if words[0] == 'Error:':
                print("found error",line)
                return line
        if words[0].find('Exception') >=0:
            return line
        
    return None

def execute_cell_code(cell_lines):
    cell = get_cell(1)

    execute_cell_lines(cell,cell_lines)
    
    output_text = get_parsed_cell_response(cell)
    
    if output_text:
    
        results = eval(output_text)
            
        df = mt_dict_to_df(results)
        
        if len(df):
            df['expiry'] = [pd.to_datetime(ts) for ts in df['expiry']]
        
        return df
    else:
        print("raise 2")
        raise QcResponseError("Timed out, no response")
        

def get_option(sym,start_time,end_time,strike_date,spread_type,strike):
    print("get_option:",sym,start_time,end_time,spread_type,strike)
    time_format = "%Y,%m,%d,%H,%M"
    cell_lines = [
        "import datetime as dt",
        "#from mt_utils import get_option_data",
        "#from mt_utils import df_to_dict",
        "qb = QuantBook()",
        "#df = get_option_data(qb,'{}',dt.datetime({}),dt.datetime({}),dt.datetime({}),'{}',{},Resolution.Minute)".format(
            sym,
            datetime_as_args(start_time),
            datetime_as_args(end_time),
            datetime_as_args(strike_date),
            spread_type,
            strike),

        "df = get_option_data(qb,'{}',dt.datetime({}),dt.datetime({}),dt.datetime({}),'{}',{})".format(
            sym,
            datetime_as_args(start_time),
            datetime_as_args(end_time),
            datetime_as_args(strike_date),
            spread_type,
            strike),
        "dct = df_to_dict(df)",
        "print(dct)"
    ]
    
    df = execute_cell_code(cell_lines)
    
    return df

def get_history(symbol,start_time,end_time,keep_list = []):
    time_format = "%Y,%m,%d,%H,%M"

    cell_lines = [
            "import datetime as dt",
            "qb = QuantBook()",
            "equity = qb.AddEquity('{}')".format(symbol),
            "start_time = dt.datetime({})".format(datetime_as_args(start_time)),
            "end_time = dt.datetime({})".format(datetime_as_args(end_time)),
            "history = qb.History(equity.Symbol, start_time, end_time)",
            "history.reset_index(inplace = True)",
            "#print('columns: ',history.columns)",
            "keep_columns(history,{})".format(keep_list),
            "dct = df_to_dict(history)",
            "print(dct)"]
    
    df = execute_cell_code(cell_lines)
    
    return df

def get_futures(symbol,start_time,end_time,keep_list = []):
    time_format = "%Y,%m,%d,%H,%M"
    
    cell_lines = [
            "import datetime as dt",
            "qb = QuantBook()",
            "es = qb.AddFuture('{}')".format(symbol),
            "#es = qb.AddFuture(Futures.Indices.SP500EMini)",
            "es.SetFilter(timedelta(0), timedelta(100))",
            "start_time = dt.datetime({})".format(datetime_as_args(start_time)),
            "end_time = dt.datetime({})".format(datetime_as_args(end_time)),
            "future_history = qb.GetFutureHistory(es.Symbol,start_time,end_time)",
            "futures_data = future_history.GetAllData()",
            "futures_data.reset_index(inplace = True)",
            "keep_columns(futures_data,['expiry','symbol','time','close','volume'])",
            "dct = df_to_dict(futures_data)",
            "print(dct)" ]
    
    df = execute_cell_code(cell_lines)
    
    return df

        
def get_option_bars(sym,start_time,end_time,strike_date,spread_type,strikes):
    
#    if sym == 'HES':
#        sym = 'AHC'

    min_strike = min(strikes)
    max_strike = max(strikes)
    if (max_strike - min_strike)/ min_strike > .8:
        print("****************************************    error in strikes")
        return

    for strike in strikes:
        if len(mtd.search_options_error(c_o_e,'MissingStrike',sym,start_time,end_time,strike_date,spread_type,strike)):
            print("Pre existing error MissingStrike",sym,strike_date,spread_type,strike)
            continue
        if len(mtd.search_options_error(c_o_e,'MissingExpire',sym,start_time,end_time,strike_date,spread_type,strike)):
            print("Pre existing error MissingExpire",sym,strike_date,spread_type,strike)
            continue
       
        
        df = mtd.search_options_cache(c_o,sym,strike_date,strike,spread_type,start_time,end_time)
        
        if len(df) == 0:
            print("downloading option",sym,spread_type,strike)
            
            try:
                df = get_option(sym,start_time,end_time,strike_date,spread_type,strike)
                if len(df):
                    df['expiry'] = [pd.to_datetime(ts) for ts in df['expiry']]

                
            except MissingStrikeError as e:
                info = "{} {}".format(e.lt_strike,e.gt_strike)
                mtd.add_options_error(c_o_e,"MissingStrike",sym,start_time,end_time,strike_date,spread_type,strike,info)
                print("MissingStrikeError",e.eq_strike,e.lt_strike,e.gt_strike)
                df = pd.DataFrame()
            except MissingExpireError as e:
                info = "{} {}".format(e.lt_expire,e.gt_expire)
                mtd.add_options_error(c_o_e,"MissingExpire",sym,start_time,end_time,strike_date,spread_type,strike,info)
                print("missingexpireerror",e.eq_expire,e.lt_expire,e.gt_expire)
                df = pd.DataFrame()
            
            print("get_option length:",len(df))
                    
                
            if len(df):
                df['symbol'] = sym  #symbol return is not always symbol searched for
                mtd.add_options_cache(c_o,sym,df)
        
def get_underlying_bars(collection,sym,start_time,end_time,keep_list = []):
        
    df = mtd.search_underlying_cache(collection,sym,start_time,end_time)
        
    if len(df) == 0:
        print("downloading underlying",sym)
        df = get_history(sym,start_time,end_time,keep_list)
        if len(df):
            df['symbol'] = sym  #symbol return is not always symbol searched for
            #df['symbol'] = [ sym.split()[0] for sym in df['symbol'] ]        
            mtd.add_underlying_cache(collection,sym,df)
        else:
            print("could not download:",sym)
    else:
        print("found in underlying_cache",sym)
        
def get_futures_bars(collection,sym,start_time,end_time,keep_list = []):
        
    df = mtd.search_futures_cache(collection,sym,start_time,end_time)
        
    if len(df) == 0:
        print("downloading underlying",sym)
        df = get_futures(sym,start_time,end_time,keep_list)
        if len(df):
            df['symbol'] = sym  #symbol return is not always symbol searched for
            #df['symbol'] = [ sym.split()[0] for sym in df['symbol'] ]        
            mtd.add_futures_cache(collection,sym,df)
        else:
            print("could not download:",sym)
    else:
        print("found in futures cache",sym)




## load project and notebook

In [3]:
QC_INFO_PATH = '/home/cc/QuantConnect/qc_info.json'

with open(QC_INFO_PATH,'r') as infile:
    qc_info = json.load(infile)

driver = webdriver.Chrome("/usr/local/bin/chromedriver")

driver.get(qc_info['url_main'])

manual_login(driver,qc_info)




In [4]:
load_project(driver,qc_info)

load_research_notebook(driver)

#tm.sleep(2)



clicking research.ipynb


In [5]:
cell = get_cell(0)
enter_cell(cell)
execute_cell(cell)


# Load data

In [None]:
import json
import datetime as dt
from datetime import timedelta
import pystore


import mt_pystore as mtd

pystore.set_path("pystore_mt")
store_mt = pystore.store('mt_store')
c_o = store_mt.collection('mt_options')
c_o_e = store_mt.collection('mt_options_error')
c_u = store_mt.collection('mt_underlying')


actions = json.loads(mtd.download("https://www.dropbox.com/s/psgut932tw77b82/actions.json?dl=1"))
print(actions[0])

ignore_sym_list = {
    'HES': "Can't find correct options. HES changes to AHC",
    'SPX': 'QC does not have data'}



# Process Events

In [None]:


HOURS_TO_EST = 3

#EVENT_LIMIT = 125

def process_events():
    

    for i,event in enumerate(actions):
            
#        if i > EVENT_LIMIT:
#            break
    
    
        if all(item in event.keys() for item in  ['action','sym','spread_type','strike_date','strikes']):
        
            if event['action'] != 'shorting':
                continue
                
                
            print(i,event['sym'],event['strike_date'],event['strikes'])
            
#            if event['sym'] != "FSLY":
#                continue
            
            ts = dt.datetime.strptime(event['ts'], '%Y-%m-%dT%H:%M:%S') + timedelta(hours=HOURS_TO_EST)
        
            if event['sym'] in ignore_sym_list.keys():
                print(event['sym'],ignore_sym_list[event['sym']])
                continue        
        
            end_date = dt.datetime.strptime(event['strike_date'],'%Y-%m-%d')
            end_time = end_date.replace(hour = 16,minute = 0)
            
#            temp_end_ts = ts + timedelta(minutes = 2)
        
            get_underlying_bars(c_u,event['sym'],ts,end_time,['symbol','time','close'])

        
            get_option_bars(event['sym'],ts,end_time,end_time,event['spread_type'],event['strikes'])

        
        
            
        
            
            


In [None]:
keep_trying = True

while keep_trying:
    try:
        cell = get_cell(0)
        enter_cell(cell)
        execute_cell(cell)
        tm.sleep(2)
        process_events()
        keep_trying = False
    except QcResponseError as e:
        print("except 4")
        print("QcResponse error:",e.error_string)
        wait_minutes(5)

        

print("finished")



### symbols that convert after get_history

HES to AHC

## symbol download

In [None]:
import pystore
import mt_pystore as mtd


pystore.set_path("pystore_large")
store_large = pystore.store('store_large')
c_u = store_large.collection('underlying')


In [None]:
from pandas_market_calendars.exchange_calendar_nyse import NYSEExchangeCalendar
nysecal = NYSEExchangeCalendar()
nysesch = nysecal.schedule('2019-01-01', '2020-01-01')

all_dates = [date for date in nysesch.index]

dt.datetime(year = 2020,month = 7,day = 3) in nysesch.index


In [None]:
print(len(all_dates))

In [None]:
num_days = 20
for x1 in range(0,len(all_dates),num_days):
    x2 = min(x1 + num_days,len(all_dates))-1
    
    start_time = all_dates[x1].replace(hour = 9,minute = 31)
    end_time = all_dates[x2].replace(hour = 16,minute = 0)
    
    print(x1,start_time)
    
    get_underlying_bars(c_u,'SPY',start_time,end_time,['symbol','time','close','volume'])

In [None]:
import datetime as dt
start_time = dt.datetime(2018,1,1,9,31)
end_time = dt.datetime(2019,1,1,9,30)
keep_list = ['symbol','time','close','volume']
    
get_underlying_bars(c_u,'SPY',start_time,end_time,keep_list)

## Add to large from mt

In [None]:
import pystore
import random

pystore.set_path("pystore_mt")
print("stores",pystore.list_stores())
mt_store = pystore.store('mt_store')
c_u = mt_store.collection('mt_underlying')
syms = list(c_u.list_items())

random.shuffle(syms)

print(syms)


pystore.set_path("pystore_large")
store_large = pystore.store('store_large')
c_u_large = store_large.collection('underlying')




In [None]:
start_time = dt.datetime(2021,1,1,9,31)
end_time = dt.datetime(2021,7,5,14,30)
keep_list = ['symbol','time','close','volume']


for i,item in enumerate(syms):
    print(i,item)
    get_underlying_bars(c_u_large,item,start_time,end_time,['symbol','time','close','volume'])
    
print("finished")
    
    

## Add futures to large

In [None]:
import pystore
import random

pystore.set_path("pystore_large")
store_large = pystore.store('store_large')
c_f_large = store_large.collection('futures')


In [None]:
start_time = dt.datetime(2020,1,1,9,31)
end_time = dt.datetime(2021,7,10,14,30)
keep_list = ['expiry,''symbol','time','close','volume']

date_index = pd.date_range(start_time,end_time,freq = 'W')
date_list = [date for date in date_index]
if date_list[-1] != end_time:
    date_list.append(end_time)

for x in range(len(date_list)-1):
    start_time = date_list[x]
    end_time = date_list[x+1]
    get_futures_bars(c_f_large,'ES',start_time,end_time,keep_list)
    
print("finished")


## Some checks on large

In [None]:
pystore.set_path("pystore_large")
store_large = pystore.store('store_large')
c_u_large = store_large.collection('underlying')

for i,sym in enumerate(c_u_large.list_items()):
    
    item = c_u_large.item(sym)
    
    data = item.data  # <-- Dask dataframe (see dask.pydata.org)
    metadata = item.metadata
    underlying_cache = item.to_pandas()
    print(i,sym,len(underlying_cache))




In [None]:
6.5 * 4

In [None]:
start_time = dt.datetime(2020,6,5,9,31)

[start_time + timedelta(minutes = 15) *x for x in range(27)]

In [None]:
date_index = pd.date_range(start="2017-05-05",end = "2019-05-05",freq = 'W')

In [None]:
for date in date_index:
    print(date)