In [1]:
#process the quantconnect notebook by sending a cell code, and reading response

from selenium import webdriver
from selenium.common.exceptions import NoSuchWindowException,WebDriverException,StaleElementReferenceException,NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

import datetime as dt
from datetime import timedelta

import pandas as pd

import time as tm
import json
import pickle
import pprint
from mt_utils import mt_dict_to_df,mt_df_to_dict

#from mt_data import options_cache,concat_dfs,add_underlying_cache,search_underlying_cache,add_options_cache



In [2]:
def wait_for_element(func,id,tries):
    while True:
        try:
            form = func(id)
            return form
        
        except NoSuchElementException as e:
            tries -= 1
            if not tries:
                raise e
            tm.sleep(.1)


def manual_login(driver,qc_info):

    log_in = wait_for_element(driver.find_element_by_xpath,'//a[@href="/login"]',10)

    log_in.click()
    
    social_group = wait_for_element(driver.find_element_by_class_name,'btn-social-group',10)
    
    buttons = social_group.find_elements_by_class_name('btn')
    buttons[1].click()
    
    fb_login_form = wait_for_element(driver.find_element_by_id,'loginform',10)
    
    fb_email = fb_login_form.find_element_by_id('email')
    fb_email.send_keys(qc_info['email'])
    fb_pass = fb_login_form.find_element_by_id('pass')
    fb_pass.send_keys(qc_info['password'])
    fb_login = fb_login_form.find_element_by_id('loginbutton')
    fb_login.click()
    wait_for_element(driver.find_element_by_class_name,'start-here-section',10)
    return

def load_research_notebook(driver):
    while True:
        file_list = wait_for_element(driver.find_elements_by_class_name,"file-name",10)
        if not len(file_list):
            tm.sleep(.1)
        else:
            break
            
    for f in file_list:
        if f.text == 'research.ipynb':
            print("clicking",f.text)
            f.click()
            break
            
    notebook_frame = wait_for_element(driver.find_element_by_class_name,"notebook-frame",10)        
    driver.switch_to.frame(notebook_frame)
            
def load_project(driver,info):
    driver.get(info['url_project'])
    
def datetime_as_args(date):
    return '{},{},{},{},{}'.format(date.year,date.month,date.day,date.time().hour,date.time().minute)

def send_combo_keys(driver,key1,key2):
    actions = ActionChains(driver)
    actions.key_down(key1)
    actions.send_keys(key2)
    actions.key_up(key1)

def send_multiline_text(driver,element,text_lines):
    text_area = element.find_element_by_tag_name("textarea")
    for line in text_lines:
        send_combo_keys(driver,Keys.ALT,Keys.ARROW_LEFT) # go to beginning of line in case of auto indention
        text_area.send_keys(line+ Keys.ENTER)  #send text

class Error(Exception):
    pass

class QcResponseError(Error):
    pass
class MissingStrikeError(Error):
    pass
class MissingExpireError(Error):
    pass

def get_cell(cell_num):
    cells = driver.find_elements_by_class_name("cell")

    cell = cells[cell_num]
    
    return cell


def get_code_mirror(cell):
    
    code_mirror = cell.find_element_by_class_name("CodeMirror")
    
    return code_mirror


def execute_cell(cell):
    
    get_code_mirror(cell).find_element_by_tag_name("textarea").send_keys(Keys.SHIFT + Keys.ENTER)   #execute cell with SHIFT_ENTER
    
def enter_cell(cell):
    
    code_mirror = get_code_mirror(cell)

    code_mirror.click()
    
    return code_mirror

 

def execute_cell_lines(cell,cell_lines):
    
    
    enter_cell(cell)
    
    code_mirror = get_code_mirror(cell)
    

    code_mirror.find_element_by_tag_name("textarea").send_keys(Keys.CONTROL + 'a')  #select all text in cell so it will be replaced
    send_multiline_text(driver,code_mirror,cell_lines)

    
    execute_cell(cell)
    

def get_cell_response(cell):
    
    output_subarea = wait_for_element(cell.find_element_by_class_name,"output_subarea",10000)  #read results
    
    return output_subarea.text

def get_parsed_cell_response(cell,tries = 100):

    while(tries):
        text = get_cell_response(cell)
        
        error = find_error(text)
        if error is not None:
            print("-----------------------------------found error:",error)
            if error == 'MissingStrike':
                raise MissingStrikeError
            if error == 'MissingExpire':
                raise MissingExpireError
            if error.find("Exception") >= 0:
                raise QcResponseError
            return None
        
        start_index = find_start_line_sequence(text,'[')
        if start_index != None:
            return text[start_index:]
        
        tm.sleep(.2)
        tries -= 1
        
    return None


def get_option(sym,start_time,end_time,strike_date,spread_type,strike):
    print("get_option:",sym,start_time,end_time,spread_type,strike)
    time_format = "%Y,%m,%d,%H,%M"
    cell_lines = [
        "import datetime as dt",
        "#from mt_utils import get_option_data",
        "#from mt_utils import df_to_dict",
        "qb = QuantBook()",
        "df = get_option_data(qb,'{}',dt.datetime({}),dt.datetime({}),dt.datetime({}),'{}',{},Resolution.Minute)".format(
            sym,
            datetime_as_args(start_time),
            datetime_as_args(end_time),
            datetime_as_args(strike_date),
            spread_type,
            strike),
        "dct = df_to_dict(df)",
        "print(dct)"
    ]
    
    cell = get_cell(1)

    execute_cell_lines(cell,cell_lines)
    
    output_text = get_parsed_cell_response(cell)
    
    if output_text:
    
        results = eval(output_text)
            
        df = mt_dict_to_df(results)
        
        if len(df):
            df['expiry'] = [pd.to_datetime(ts) for ts in df['expiry']]
        
        return df
    else:
        raise QcResponseError

def find_start_line_sequence(text,start_char):
    last_char = '\n'
    
    for i,c in enumerate(text):
        
        if last_char == '\n' and c == start_char:
            return i
        
        last_char = c
        
    return None

def find_error(text):
    for line in text.splitlines():
#        if len(line) < 2000:
#            print("find_error",line)
        words = line.split()
        if len(words):
            if words[0] == 'Error:':
                print("found error",line)
                return words[1]
        if words[0].find('Exception') >=0:
            return words[0]
        
    return None
        
        
        
def get_history(symbol,start_time,end_time):
    time_format = "%Y,%m,%d,%H,%M"
    drop_string = "'askclose', 'askhigh', 'asklow', 'askopen', 'asksize', 'bidclose','bidhigh', 'bidlow', 'bidopen', 'bidsize', 'high', 'low','open', 'volume'"

    cell_lines = [
            "import datetime as dt",
            "#from mt_utils import df_to_dict",
            "qb = QuantBook()",
            "equity = qb.AddEquity('{}')".format(symbol),
            "start_time = dt.datetime({})".format(datetime_as_args(start_time)),
            "end_time = dt.datetime({})".format(datetime_as_args(end_time)),
            "history = qb.History(equity.Symbol, start_time, end_time, Resolution.Minute)",
            "history.reset_index(inplace = True)",
            "drop_list = [{}] if len(history) else [] ".format(drop_string),
            "history.drop(drop_list,axis=1,inplace = True)",
            "dct = df_to_dict(history)",
            "print(dct)"]
    
    cell = get_cell(1)

    execute_cell_lines(cell,cell_lines)
    
    print("get_history returning empty dataframe")
    return pd.DataFrame()
    
    output_text = get_parsed_cell_response(cell)
    
    if output_text:
    
        results = eval(output_text)
    
        df = mt_dict_to_df(results)
    
        return df
    else:
        pass
        raise QcResponseError
        
        
def get_option_bars(sym,start_time,end_time,strike_date,spread_type,strikes):
    
#    if sym == 'HES':
#        sym = 'AHC'

    min_strike = min(strikes)
    max_strike = max(strikes)
    if (max_strike - min_strike)/ min_strike > .8:
        print("****************************************    error in strikes")
        return

    
    
    for strike in strikes:
        
        df = mtd.search_options_cache(sym,strike_date,strike,spread_type,start_time,end_time)
        
        if len(df) == 0:
            print("downloading option",sym,spread_type,strike)
            
            try:
                df = get_option(sym,start_time,end_time,strike_date,spread_type,strike)
            except MissingStrikeError:
                continue
            except MissingExpireError:
                continue
            
            print("get_option length:",len(df))
                    
                
            if len(df):
                df['symbol'] = sym  #symbol return is not always symbol searched for
                mtd.add_options_cache(sym,df)
        
def get_underlying_bars(sym,start_time,end_time):
        
    df = search_underlying_cache(event['sym'],ts,end_date)
        
    if len(df) == 0:
        print("downloading underlying")
        df = get_history(sym,ts,end_date)
        if len(df):
            df['symbol'] = sym  #symbol return is not always symbol searched for
            #df['symbol'] = [ sym.split()[0] for sym in df['symbol'] ]        
            add_underlying_cache(df)
        else:
            print("could not download:",sym)
    else:
        print("found in underlying_cache")

def wait_minutes(count):
    while count:
        print("waiting {} minutes".format(count))
        tm.sleep(60)
        count-=1



## load project and notebook

In [3]:
QC_INFO_PATH = '/home/cc/QuantConnect/qc_info.json'

with open(QC_INFO_PATH,'r') as infile:
    qc_info = json.load(infile)

driver = webdriver.Chrome("/usr/local/bin/chromedriver")

driver.get(qc_info['url_main'])

manual_login(driver,qc_info)




In [4]:
load_project(driver,qc_info)

load_research_notebook(driver)


clicking research.ipynb


# Load data

In [5]:
import json
import datetime as dt
from datetime import timedelta

mt_data_source = False

if mt_data_source:
    import mt_data as mtd
#    from mt_data import download,data_global_set,data_init,data_info,get_holidays
else:
    import mt_pystore as mtd
    #from mt_pystore import data_global_set,data_init,data_info,download,get_holidays

mtd.data_global_set(*mtd.data_init())

mtd.data_info()

actions = json.loads(mtd.download("https://www.dropbox.com/s/psgut932tw77b82/actions.json?dl=1"))
print(actions[0])

ignore_sym_list = {
    'HES': "Can't find correct options. HES changes to AHC",
    'SPX': 'QC does not have data'}



{'ts': '2020-05-26T06:52:24', 'action': 'shorting', 'sym': 'ZM', 'spread_type': 'CALL', 'strike_date': '2020-05-29', 'strikes': [180.0, 185.0]}


# Process Events

In [6]:


HOURS_TO_EST = 3

#EVENT_LIMIT = 125

def process_events():
    

    for i,event in enumerate(actions):
            
#        if i > EVENT_LIMIT:
#            break
    
    
        if all(item in event.keys() for item in  ['action','sym','spread_type','strike_date','strikes']):
        
            if event['action'] != 'shorting':
                continue
                
                
            print(i,event['sym'],event['strike_date'],event['strikes'])
            
#            if event['sym'] != "ZM":
#                continue
            
            ts = dt.datetime.strptime(event['ts'], '%Y-%m-%dT%H:%M:%S') + timedelta(hours=HOURS_TO_EST)
        
            if event['sym'] in ignore_sym_list.keys():
                print(event['sym'],ignore_sym_list[event['sym']])
                continue        
        
            end_date = dt.datetime.strptime(event['strike_date'],'%Y-%m-%d')
            end_time = end_date.replace(hour = 16,minute = 0)
            
#            temp_end_ts = ts + timedelta(minutes = 2)
        
#        get_underlying_bars(event['sym'],ts,end_time)

        
            get_option_bars(event['sym'],ts,end_time,end_time,event['spread_type'],event['strikes'])
#            get_option_bars(event['sym'],ts,temp_end_ts,end_time,event['spread_type'],event['strikes'])
        
        
            
        
            
            


In [None]:
keep_trying = True

while keep_trying:
    try:
        cell = get_cell(0)
        enter_cell(cell)
        execute_cell(cell)
        tm.sleep(2)
        process_events()
        keep_trying = False
    except QcResponseError:
        print("QcResponse error")
        wait_minutes(5)

        

print("finished")



0 ZM 2020-05-29 [180.0, 185.0]
1 PYPL 2020-06-05 [155.0, 160.0]
2 HES 2020-05-29 [46.0, 44.0]
HES Can't find correct options. HES changes to AHC
3 AAPL 2020-05-29 [315.0, 310.0]
4 GS 2020-05-29 [185.0, 180.0]
6 W 2020-05-29 [170.0, 175.0]
10 AMD 2020-06-05 [56.0, 58.0]
13 SPX 2020-05-27 [2930.0, 2910.0]
SPX QC does not have data
14 SPX 2020-05-27 [2930.0, 2910.0]
SPX QC does not have data
15 HES 2020-06-05 [46.0, 41.0]
HES Can't find correct options. HES changes to AHC
16 MAR 2020-06-05 [94.0, 90.0]
17 BA 2020-06-05 [140.0, 135.0]
19 DKNG 2020-06-05 [40.0, 45.0]
20 COST 2020-06-05 [315.0, 320.0]
24 SBUX 2020-06-05 [80.0, 82.0]
25 DKNG 2020-06-05 [42.0, 50.0]
26 SPY 2020-05-29 [299.0, 289.0]
28 EA 2020-06-05 [118.0, 114.0]
29 NFLX 2020-06-05 [400.0, 390.0]
30 RCL 2020-06-05 [49.0, 47.0]
31 TSLA 2020-06-05 [820.0, 810.0]
32 DKNG 2020-06-05 [48.0, 537.0]
****************************************    error in strikes
34 BYND 2020-06-05 [150.0, 160.0]
35 SMH 2020-06-05 [137.0, 133.0]
38 DKNG 

### symbols that convert after get_history

HES to AHC