In [2]:
#%%
import pip
import time
import os
import pandas as pd
import datetime as dt
import requests
import json
import numpy as np

try:
    from bs4 import BeautifulSoup as bs
except ImportError:
    pip.main(['install', 'bs4'])
    from bs4 import BeautifulSoup as bs
    
try:
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
except ImportError:
    pip.main(['install', 'selenium'])
    from selenium import webdriver
    from selenium.common.exceptions import TimeoutException
    
chrome_dir = os.getcwd()

In [283]:
sectors = ['Basic Materials', 
           'Communication Services',
           'Consumer Cyclical',
           'Consumer Defensive',
           'Energy',
           'Financial Services',
           'Healthcare',
           'Industrials',
           'Real Estate',
           'Utilities',
           'Technology']

aq_query = """
SELECT 
ticker,
name,
exchange,
sector,
industry,
free_float,
market_cap,
average_volume,
beta,
held_by_insiders_pct,
held_by_institutions_pct,
close_price,
open_price,
high_price,
low_price,
volume,
previous_close_price,
previous_open_price,
previous_high_price,
previous_low_price,
previous_volume,
high_price_52_week,
high_price_26_week,
high_price_13_week,
high_price_4_week,
high_price_2_week,
high_price_1_week,
low_price_52_week,
low_price_26_week,
low_price_13_week,
low_price_4_week,
low_price_2_week,
low_price_1_week,
high_volume_52_week,
high_volume_26_week,
high_volume_13_week,
high_volume_4_week,
high_volume_2_week,
high_volume_1_week,
low_volume_52_week,
low_volume_26_week,
low_volume_13_week,
low_volume_4_week,
low_volume_2_week,
low_volume_1_week,
money_flow_52_week,
money_flow_26_week,
money_flow_13_week,
money_flow_4_week,
money_flow_2_week,
money_flow_1_week,
money_flow_1_day,
total_volume_52_week,
total_volume_26_week,
total_volume_13_week,
total_volume_4_week,
total_volume_2_week,
total_volume_1_week,
percent_change_price_52_week,
percent_change_price_26_week,
percent_change_price_13_week,
percent_change_price_4_week,
percent_change_price_2_week,
percent_change_price_1_week,
percent_change_price_1_day,
new_high_price_52_week,
new_high_price_26_week,
new_high_price_13_week,
new_high_price_4_week,
new_high_price_2_week,
new_high_price_1_week,
new_low_price_52_week,
new_low_price_26_week,
new_low_price_13_week,
new_low_price_4_week,
new_low_price_2_week,
new_low_price_1_week,
consecutive_up_price_day,
consecutive_up_price_week,
consecutive_down_price_day,
consecutive_down_price_week,
consecutive_up_volume_day,
consecutive_up_volume_week,
consecutive_down_volume_day,
consecutive_down_volume_week,
new_high_volume_52_week,
new_high_volume_26_week,
new_high_volume_13_week,
new_high_volume_4_week,
new_high_volume_2_week,
new_high_volume_1_week,
new_low_volume_52_week,
new_low_volume_26_week,
new_low_volume_13_week,
new_low_volume_4_week,
new_low_volume_2_week,
new_low_volume_1_week,
sma_5,
sma_10,
sma_20,
sma_50,
sma_100,
sma_200,
previous_sma_5,
previous_sma_10,
previous_sma_20,
previous_sma_50,
previous_sma_100,
previous_sma_200,
macd_12_26,
macd_signal_12_26_9,
previous_macd_12_26,
previous_macd_signal_12_26_9,
rsi_14,
previous_rsi_14,
stochastic_14_3_3_k,
stochastic_14_3_3_d,
previous_stochastic_14_3_3_k,
previous_stochastic_14_3_3_d,
bollinger_upper_20_2,
bollinger_lower_20_2,
previous_bollinger_upper_20_2,
previous_bollinger_lower_20_2,
quarterly_revenue,
quarterly_net_income,
previous_quarterly_revenue_qoq,
previous_quarterly_revenue_yoy,
previous_quarterly_net_income_qoq,
previous_quarterly_net_income_yoy,
revenue,
net_income,
previous_revenue,
previous_net_income,
cost_goods_sold,
gross_profit,
operating_expenses,
operating_income,
non_operating_income,
pre_tax_income,
normalized_pre_tax_income,
income_after_taxes,
income_continuous_operations,
consolidated_net_income,
normalized_income_after_taxes,
ebit,
ebitda,
current_assets,
property_plant_equipment,
long_term_assets,
total_assets,
current_liabilities,
long_term_debt,
long_term_liabilities,
total_liabilities,
common_equity,
tangible_stockholders_equity,
shareholders_equity,
common_shares_outstanding,
cash_flow_operating_activity,
cash_flow_investing_activity,
cash_flow_financial_activity,
beginning_cash,
ending_cash,
net_change_cash,
pe_ratio,
pe_ratio_ttm,
peg_ratio,
price_sales_ratio,
price_book_ratio,
price_cash_ratio,
pre_tax_margin,
net_margin,
return_on_equity,
return_on_assets,
current_ratio,
quick_ratio,
debt_equity_ratio,
inventory_turnover,
book_value_per_share,
fiscal_period_end_qr1,
earnings_estimate_qr1,
expected_report_date_qr1,
days_from_expected_report_date_qr1,
fiscal_period_end_qr0,
earnings_estimate_qr0,
earnings_actual_qr0,
report_date_qr0,
days_since_report_date_qr0,
eps_year,
eps_ttm,
last_dividend_date,
last_dividend_amount,
days_since_last_dividend,
annual_dividend,
dividend_yield,
percent_growth_quarterly_revenue_qoq,
percent_growth_quarterly_revenue_yoy,
percent_growth_annual_revenue,
percent_growth_quarterly_net_income_qoq,
percent_growth_quarterly_net_income_yoy,
percent_growth_annual_net_income,
new_high_annual_revenue_5_year,
new_low_annual_revenue_5_year,
new_high_quarterly_revenue_qoq_5_year,
new_low_quarterly_revenue_qoq_5_year,
new_high_quarterly_revenue_yoy_5_year,
new_low_quarterly_revenue_yoy_5_year,
new_high_annual_net_income_5_year,
new_low_annual_net_income_5_year,
new_high_quarterly_net_income_qoq_5_year,
new_low_quarterly_net_income_qoq_5_year,
new_high_quarterly_net_income_yoy_5_year,
new_low_quarterly_net_income_yoy_5_year,
consecutive_up_quarterly_revenue_qoq,
consecutive_down_quarterly_revenue_qoq,
consecutive_up_quarterly_revenue_yoy,
consecutive_down_quarterly_revenue_yoy,
consecutive_up_annual_revenue,
consecutive_down_annual_revenue,
consecutive_up_quarterly_net_income_qoq,
consecutive_down_quarterly_net_income_qoq,
consecutive_up_quarterly_net_income_yoy,
consecutive_down_quarterly_net_income_yoy,
consecutive_up_annual_net_income,
consecutive_down_annual_net_income,
last_split_date,
last_split_ratio,
days_since_last_split,
percentile_industry_percent_change_price_52_week,
percentile_industry_percent_change_price_26_week,
percentile_industry_percent_change_price_13_week,
percentile_industry_percent_change_price_4_week,
percentile_industry_percent_change_price_2_week,
percentile_industry_percent_change_price_1_week,
percentile_industry_percent_change_price_1_day,
percentile_industry_percent_growth_quarterly_revenue_qoq,
percentile_industry_percent_growth_quarterly_revenue_yoy,
percentile_industry_percent_growth_annual_revenue,
percentile_industry_percent_growth_quarterly_net_income_qoq,
percentile_industry_percent_growth_quarterly_net_income_yoy,
percentile_industry_percent_growth_annual_net_income,
percentile_industry_pe_ratio,
percentile_industry_pe_ratio_ttm,
percentile_industry_peg_ratio,
percentile_industry_price_sales_ratio,
percentile_industry_price_book_ratio,
percentile_industry_price_cash_ratio,
percentile_industry_pre_tax_margin,
percentile_industry_net_margin,
percentile_industry_return_on_equity,
percentile_industry_return_on_assets,
percentile_industry_current_ratio,
percentile_industry_quick_ratio,
percentile_industry_debt_equity_ratio,
percentile_industry_inventory_turnover,
percentile_industry_dividend_yield,
percentile_industry_held_by_insiders_pct,
percentile_industry_held_by_institutions_pct,
percentile_sector_percent_change_price_52_week,
percentile_sector_percent_change_price_26_week,
percentile_sector_percent_change_price_13_week,
percentile_sector_percent_change_price_4_week,
percentile_sector_percent_change_price_2_week,
percentile_sector_percent_change_price_1_week,
percentile_sector_percent_change_price_1_day,
percentile_sector_percent_growth_quarterly_revenue_qoq,
percentile_sector_percent_growth_quarterly_revenue_yoy,
percentile_sector_percent_growth_annual_revenue,
percentile_sector_percent_growth_quarterly_net_income_qoq,
percentile_sector_percent_growth_quarterly_net_income_yoy,
percentile_sector_percent_growth_annual_net_income,
percentile_sector_pe_ratio,
percentile_sector_pe_ratio_ttm,
percentile_sector_peg_ratio,
percentile_sector_price_sales_ratio,
percentile_sector_price_book_ratio,
percentile_sector_price_cash_ratio,
percentile_sector_pre_tax_margin,
percentile_sector_net_margin,
percentile_sector_return_on_equity,
percentile_sector_return_on_assets,
percentile_sector_current_ratio,
percentile_sector_quick_ratio,
percentile_sector_debt_equity_ratio,
percentile_sector_inventory_turnover,
percentile_sector_dividend_yield,
percentile_sector_held_by_insiders_pct,
percentile_sector_held_by_institutions_pct,
percentile_market_percent_change_price_52_week,
percentile_market_percent_change_price_26_week,
percentile_market_percent_change_price_13_week,
percentile_market_percent_change_price_4_week,
percentile_market_percent_change_price_2_week,
percentile_market_percent_change_price_1_week,
percentile_market_percent_change_price_1_day,
percentile_market_percent_growth_quarterly_revenue_qoq,
percentile_market_percent_growth_quarterly_revenue_yoy,
percentile_market_percent_growth_annual_revenue,
percentile_market_percent_growth_quarterly_net_income_qoq,
percentile_market_percent_growth_quarterly_net_income_yoy,
percentile_market_percent_growth_annual_net_income,
percentile_market_pe_ratio,
percentile_market_pe_ratio_ttm,
percentile_market_peg_ratio,
percentile_market_price_sales_ratio,
percentile_market_price_book_ratio,
percentile_market_price_cash_ratio,
percentile_market_pre_tax_margin,
percentile_market_net_margin,
percentile_market_return_on_equity,
percentile_market_return_on_assets,
percentile_market_current_ratio,
percentile_market_quick_ratio,
percentile_market_debt_equity_ratio,
percentile_market_inventory_turnover,
percentile_market_dividend_yield,
percentile_market_held_by_insiders_pct,
percentile_market_held_by_institutions_pct
FROM data
ORDER BY market_cap DESC
LIMIT 100
"""

In [290]:
aq_query_url = 'https://www.alphaquery.com/query'

browser = webdriver.Chrome(executable_path = chrome_dir + "\\chromedriver.exe")

browser.get(aq_query_url)

browser.delete_all_cookies()

text_area_list = browser.find_elements_by_tag_name('textarea')
browser.find_element_by_xpath("//div[@class='CodeMirror-code']").click()

for text_area in text_area_list:
    try:
        text_area.send_keys(webdriver.common.keys.Keys.CONTROL, "a")
        text_area.send_keys("SELECT ticker FROM data LIMIT 10")
        break
    except:
        continue

# click submit button
run_button = browser.find_element_by_id('btn-run-query')

run_button.click()

last_height = browser.execute_script("return document.body.scrollHeight")

browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

aq_tables = browser.find_elements_by_tag_name('table')

aq_headers = bs(aq_tables[1].get_attribute('innerHTML'), "html.parser")

col_headers = {}

col_headers_lst = []

for row in aq_headers.find_all('tr'):
    if 'class' in row.attrs.keys():
        if 'even' in row['class'] or 'odd' in row['class']:
            col_headers[row.find(attrs={"class" : "data-var data-var-column-name"}).text] = []
            col_headers_lst.append(row.find(attrs={"class" : "data-var data-var-column-name"}).text)

browser.quit()

In [294]:
query_columns = ', '.join(col_headers_lst)

aq_query = """
SELECT {0}
FROM data
WHERE sector IS NOT null
ORDER BY market_cap DESC
LIMIT 100
""".format(query_columns)

In [295]:
aq_query_url = 'https://www.alphaquery.com/query'

browser = webdriver.Chrome(executable_path = chrome_dir + "\\chromedriver.exe")

browser.get(aq_query_url)

browser.delete_all_cookies()

text_area_list = browser.find_elements_by_tag_name('textarea')
browser.find_element_by_xpath("//div[@class='CodeMirror-code']").click()

for text_area in text_area_list:
    try:
        text_area.send_keys(webdriver.common.keys.Keys.CONTROL, "a")
        text_area.send_keys(aq_query)
        break
    except:
        continue

# click submit button
run_button = browser.find_element_by_id('btn-run-query')

run_button.click()

last_height = browser.execute_script("return document.body.scrollHeight")

browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")

time.sleep(2)

aq_tables = browser.find_elements_by_tag_name('table')

aq_headers = bs(aq_tables[1].get_attribute('innerHTML'), "html.parser")
aq_body = bs(aq_tables[2].get_attribute('innerHTML'), "html.parser")

browser.quit()

In [297]:
col_headers = {}
col_headers_lst = []

for row in aq_headers.find_all('tr'):
    if 'class' in row.attrs.keys():
        if 'even' in row['class'] or 'odd' in row['class']:
            col_headers[row.find(attrs={"class" : "data-var data-var-column-name"}).text] = []
            col_headers_lst.append(row.find(attrs={"class" : "data-var data-var-column-name"}).text)
            
for row in aq_body.find('tbody').find_all('tr'):
    for i, col in enumerate(row.find_all('td')):
        curr_header = col_headers_lst[i]
        col_headers[curr_header].append(col.text.strip())

In [303]:
aq_df = pd.DataFrame(col_headers).set_index('ticker')

aq_df['sector']

ticker
MSFT             Technology
AAPL             Technology
AMZN      Consumer Cyclical
GOOGL            Technology
GOOG             Technology
FB               Technology
BRK.B    Financial Services
BRK.A    Financial Services
BABA      Consumer Cyclical
JPM      Financial Services
Name: sector, dtype: object