In [1]:
#process the quantconnect notebook by sending a cell code, and reading response

from selenium import webdriver
from selenium.common.exceptions import NoSuchWindowException,WebDriverException,StaleElementReferenceException,NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains

import datetime as dt
from datetime import timedelta

import time as tm
import json
import pickle
import pprint
from mt_utils import mt_dict_to_df,mt_df_to_dict
from mt_data import options_cache,concat_dfs,add_underlying_cache,search_underlying_cache,add_options_cache
from mt_data import search_options_cache






In [2]:
def wait_for_element(func,id,tries):
    while True:
        try:
            form = func(id)
            return form
        
        except NoSuchElementException as e:
            tries -= 1
            if not tries:
                raise e
            tm.sleep(.1)


def manual_login(driver,qc_info):

    log_in = wait_for_element(driver.find_element_by_xpath,'//a[@href="/login"]',10)

    log_in.click()
    
    social_group = wait_for_element(driver.find_element_by_class_name,'btn-social-group',10)
    
    buttons = social_group.find_elements_by_class_name('btn')
    buttons[1].click()
    
    fb_login_form = wait_for_element(driver.find_element_by_id,'loginform',10)
    
    fb_email = fb_login_form.find_element_by_id('email')
    fb_email.send_keys(qc_info['email'])
    fb_pass = fb_login_form.find_element_by_id('pass')
    fb_pass.send_keys(qc_info['password'])
    fb_login = fb_login_form.find_element_by_id('loginbutton')
    fb_login.click()
    wait_for_element(driver.find_element_by_class_name,'start-here-section',10)
    return

def load_research_notebook(driver):
    while True:
        file_list = wait_for_element(driver.find_elements_by_class_name,"file-name",10)
        if not len(file_list):
            tm.sleep(.1)
        else:
            break
            
    for f in file_list:
        if f.text == 'research.ipynb':
            print("clicking",f.text)
            f.click()
            break
            
    notebook_frame = wait_for_element(driver.find_element_by_class_name,"notebook-frame",10)        
    driver.switch_to.frame(notebook_frame)
            
def load_project(driver,info):
    driver.get(info['url_project'])
    
def datetime_as_args(date):
    return '{},{},{},{},{}'.format(date.year,date.month,date.day,date.time().hour,date.time().minute)

def send_combo_keys(driver,key1,key2):
    actions = ActionChains(driver)
    actions.key_down(key1)
    actions.send_keys(key2)
    actions.key_up(key1)

def send_multiline_text(driver,element,text_lines):
    text_area = element.find_element_by_tag_name("textarea")
    for line in text_lines:
        send_combo_keys(driver,Keys.ALT,Keys.ARROW_LEFT) # go to beginning of line in case of auto indention
        text_area.send_keys(line+ Keys.ENTER)  #send text

class Error(Exception):
    pass

class QcResponseError(Error):
    pass

def get_cell(cell_num):
    cells = driver.find_elements_by_class_name("cell")

    cell = cells[cell_num]
    
    return cell


def get_code_mirror(cell):
    
    code_mirror = cell.find_element_by_class_name("CodeMirror")
    
    return code_mirror


def execute_cell(cell):
    
    get_code_mirror(cell).find_element_by_tag_name("textarea").send_keys(Keys.SHIFT + Keys.ENTER)   #execute cell with SHIFT_ENTER
    
def enter_cell(cell):
    
    code_mirror = get_code_mirror(cell)

    code_mirror.click()
    
    return code_mirror

 

def execute_cell_lines(cell,cell_lines):
    
    
    enter_cell(cell)
    
    code_mirror = get_code_mirror(cell)
    

    code_mirror.find_element_by_tag_name("textarea").send_keys(Keys.CONTROL + 'a')  #select all text in cell so it will be replaced
    send_multiline_text(driver,code_mirror,cell_lines)

    
    execute_cell(cell)
    

def get_cell_response(cell):
    
    output_subarea = wait_for_element(cell.find_element_by_class_name,"output_subarea",10000)  #read results
    
    return output_subarea.text

def get_parsed_cell_response(cell,tries = 100):

    while(tries):
        text = get_cell_response(cell)
        
        start_index = find_start_line_sequence(text,'[')
        if start_index != None:
            return text[start_index:]
        
        tm.sleep(.2)
        tries -= 1
        
    return None


def get_option(sym,start_time,end_time,spread_type,strike):
    time_format = "%Y,%m,%d,%H,%M"
    cell_lines = [
        "import datetime as dt",
        "#from mt_utils import get_option_data",
        "#from mt_utils import df_to_dict",
        "qb = QuantBook()",
        "df = get_option_data(qb,'{}',dt.datetime({}),dt.datetime({}),'{}',{},Resolution.Minute)".format(
            sym,
            datetime_as_args(start_time),
            datetime_as_args(end_time),
            spread_type,
            strike),
        "dct = df_to_dict(df)",
        "print(dct)"
    ]
    
    cell = get_cell(1)

    execute_cell_lines(cell,cell_lines)
    
    output_text = get_parsed_cell_response(cell)
    
    if output_text:
    
        results = eval(output_text)
    
        df = mt_dict_to_df(results)
    
        return df
    else:
        raise QcResponseError

def find_start_line_sequence(text,start_char):
    last_char = '\n'
    
    for i,c in enumerate(text):
        
        if last_char == '\n' and c == start_char:
            return i
        
        last_char = c
        
    return None
        
        
        
def get_history(symbol,start_time,end_time):
    time_format = "%Y,%m,%d,%H,%M"
    drop_string = "'askclose', 'askhigh', 'asklow', 'askopen', 'asksize', 'bidclose','bidhigh', 'bidlow', 'bidopen', 'bidsize', 'high', 'low','open', 'volume'"

    cell_lines = [
            "import datetime as dt",
            "#from mt_utils import df_to_dict",
            "qb = QuantBook()",
            "equity = qb.AddEquity('{}')".format(symbol),
            "start_time = dt.datetime({})".format(datetime_as_args(start_time)),
            "end_time = dt.datetime({})".format(datetime_as_args(end_time)),
            "history = qb.History(equity.Symbol, start_time, end_time, Resolution.Minute)",
            "history.reset_index(inplace = True)",
            "drop_list = [{}] if len(history) else [] ".format(drop_string),
            "history.drop(drop_list,axis=1,inplace = True)",
            "dct = df_to_dict(history)",
            "print(dct)"]
    
    cell = get_cell(1)

    execute_cell_lines(cell,cell_lines)
    
    output_text = get_parsed_cell_response(cell)
    
    if output_text:
    
        results = eval(output_text)
    
        df = mt_dict_to_df(results)
    
        return df
    else:
        raise QcResponseError
        
def get_option_bars(sym,start_time,end_time,strike_date,spread_type,strikes):
    
#    if sym == 'HES':
#        sym = 'AHC'
    
    
    for strike in strikes:
        
        df = search_options_cache(sym,strike_date,strike,spread_type,start_time,end_time)
        
        print("get_option_bars after search",sym,len(df))
        
        if len(df) == 0:
            print("downloading option",sym,spread_type,strike)
            
            df = get_option(sym,start_time,end_time,spread_type,strike)
                    
                
            if len(df):
                df['symbol'] = sym  #symbol return is not always symbol searched for
                add_options_cache(df)
        
def get_underlying_bars(sym,start_time,end_time):
        
    df = search_underlying_cache(event['sym'],ts,end_date)
        
    if len(df) == 0:
        print("downloading underlying")
        df = get_history(sym,ts,end_date)
        if len(df):
            df['symbol'] = sym  #symbol return is not always symbol searched for
            #df['symbol'] = [ sym.split()[0] for sym in df['symbol'] ]        
            add_underlying_cache(df)
        else:
            print("could not download:",sym)
    else:
        print("found in underlying_cache")




## load project and notebook

In [3]:
QC_INFO_PATH = '/home/cc/QuantConnect/qc_info.json'

with open(QC_INFO_PATH,'r') as infile:
    qc_info = json.load(infile)

driver = webdriver.Chrome("/usr/local/bin/chromedriver")

driver.get(qc_info['url_main'])

manual_login(driver,qc_info)




In [4]:
load_project(driver,qc_info)

load_research_notebook(driver)


clicking research.ipynb


# Load data

In [5]:
from mt_data import download,data_global_set,data_init,data_info
from mt_data import get_holidays
import json
import datetime as dt
from datetime import timedelta




data_global_set(*data_init())

data_info()

actions = json.loads(download("https://www.dropbox.com/s/psgut932tw77b82/actions.json?dl=1"))
print(actions[0])

print("holidays = ",get_holidays())



download_data: https://www.dropbox.com/s/1lszx4xveis9x5i/qc_option_bars_100.json?dl=1
download_data dict data:  28
Download data df len: 137477
prepare_options_cache Index(['symbol', 'time', 'type', 'expiry', 'strike', 'mid'], dtype='object')
sorting options_cache
finished
after drop 137477
data_init
options_cache
       symbol                time  type      expiry  strike    mid
1472     AAPL 2020-05-26 10:59:00   Put  2020-05-29   310.0  0.540
1473     AAPL 2020-05-26 11:00:00   Put  2020-05-29   310.0  0.545
1474     AAPL 2020-05-26 11:01:00   Put  2020-05-29   310.0  0.530
1475     AAPL 2020-05-26 11:02:00   Put  2020-05-29   310.0  0.520
1476     AAPL 2020-05-26 11:03:00   Put  2020-05-29   310.0  0.500
...       ...                 ...   ...         ...     ...    ...
137472     ZM 2020-06-05 15:56:00  Call  2020-06-05   235.0  0.050
137473     ZM 2020-06-05 15:57:00  Call  2020-06-05   235.0  0.050
137474     ZM 2020-06-05 15:58:00  Call  2020-06-05   235.0  0.050
137475     ZM 

# Process Events

In [6]:
from mt_data import get_holidays

print("holidays")
print(get_holidays())

HOURS_TO_EST = 3

EVENT_LIMIT = 125

def process_events():

    for i,event in enumerate(actions):
    
        if i > EVENT_LIMIT:
            break
    
    
        if all(item in event.keys() for item in  ['action','sym','spread_type','strike_date','strikes']):
        
            if event['action'] != 'shorting':
                continue
            
#        if event['sym'] != "HES":
#            continue

            ts = dt.datetime.strptime(event['ts'], '%Y-%m-%dT%H:%M:%S') + timedelta(hours=HOURS_TO_EST)

            
            print(i,event['sym'],ts,event['strike_date'])
            if event['sym'] == 'SPX':
                print("don't bother downloading SPX, it's not there")
                continue
        
        
            end_date = dt.datetime.strptime(event['strike_date'],'%Y-%m-%d')
            end_time = end_date.replace(hour = 16,minute = 0)
        
#        get_underlying_bars(event['sym'],ts,end_time)
        
            get_option_bars(event['sym'],ts,end_time,end_time.date(),event['spread_type'],event['strikes'])
        
        
            
        
            
print("finished")
            


holidays
[]
finished


In [7]:
keep_trying = True

while keep_trying:
    try:
        cell = get_cell(0)
        enter_cell(cell)
        execute_cell(cell)
        tm.sleep(2)
        process_events()
        keep_trying = False
    except:
        print("QcResponse error, trying again in 5 minutes",dt.datetime.now())
        tm.sleep(5*60)
        




0 ZM 2020-05-26 09:52:24 2020-05-29
search_options_cache found ZM 1538
missing_data_by_date found all days
get_option_bars after search ZM 1538
search_options_cache found ZM 1538
missing_data_by_date found all days
get_option_bars after search ZM 1538
1 PYPL 2020-05-26 10:33:38 2020-06-05
search_options_cache found PYPL 3447
missing_data_by_date found all days
get_option_bars after search PYPL 3447
search_options_cache found PYPL 3447
missing_data_by_date found all days
get_option_bars after search PYPL 3447
2 HES 2020-05-26 10:41:19 2020-05-29
search_options_cache found HES 0
get_option_bars after search HES 0
downloading option HES PUT 46.0
search_options_cache found HES 0
get_option_bars after search HES 0
downloading option HES PUT 44.0
3 AAPL 2020-05-26 10:58:17 2020-05-29
search_options_cache found AAPL 1472
missing_data_by_date found all days
get_option_bars after search AAPL 1472
search_options_cache found AAPL 1472
missing_data_by_date found all days
get_option_bars after sear

In [None]:
cell = get_cell(0)
enter_cell(cell)
execute_cell(cell)

In [None]:
execute_cell(0)

In [None]:
get_cell(0)

In [None]:
len(underlying_cache)

In [None]:
underlying_cache

In [None]:
ar = ['aa b','b c']
[a.split()[0] for a in ar]


In [None]:
underlying_cache

In [None]:
import pandas as pd
date = dt.datetime(2020,9,7)
len(pd.bdate_range(date,date))

In [None]:
from mt_data import get_underlying_cache
from datetime import datetime
from datetime import date

from pandas.tseries.holiday import USFederalHolidayCalendar
import pprint

underlying_cache = get_underlying_cache()
pprint.pprint(underlying_cache)

print("len(underlying_cache):",len(underlying_cache))
    
pprint.pprint(underlying_cache)

cal = USFederalHolidayCalendar()
holidays = []

def find_holidays(df):
    global holidays
    
    holidays = [d.date() for d in cal.holidays(start = df['time'][0], end= date.today())]
    
    
find_holidays(underlying_cache)

print(holidays)
    
    


In [None]:
import datetime as datetime
after_hours_start = datetime.time(16,31,0)

In [None]:
from mt_data import get_underlying_cache
underlying_cache = get_underlying_cache()

In [None]:
text = 'abcdefg'
text[1:]

In [None]:
from mt_data import save_underlying_cache

save_underlying_cache('/home/cc/Dropbox/qc_underlying.json')


In [8]:
from mt_data import save_options_cache


save_options_cache('/home/cc/Dropbox/qc_option_bars_125.json')


TypeError: Object of type date is not JSON serializable

In [None]:
set(list(options_cache['symbol']))

In [None]:
options_cache['symbol']

### symbols that convert after get_history

HES to AHC

In [None]:
from mt_data import get_options_cache
df = get_options_cache()

In [None]:
print(df)

In [None]:
dt.date

In [None]:
dt.datetime.now()