In [1]:
from IPython.display import display, HTML
from datetime import datetime
from lxml import html, etree, objectify
import pandas as pd
import requests
import threading
import queue
import csv
import os

In [2]:
def get_symbols():
    with open("n16_symbols.csv", "r") as f:
        symbols = f.read().split()
    return list(set(symbols))
symbols = get_symbols()
len(symbols)

3198

In [3]:
def get_data(symbol):
    url = "http://www.nasdaq.com/symbol/{}/institutional-holdings".format(symbol.lower())
    page = requests.get(url)
    tree = html.fromstring(page.content)    
    
    data = {}
    # Price 
    div = tree.find('.//div[@id="qwidget_lastsale"]')
    if div is not None:
        data["price"] = div.text
        
    def get_text_val(t):
        return ("0" if t is None else t).replace(",", "")
    
    # Institutional Ownership
    table = tree.find('.//div[@class="infoTable marginT15px marginL15px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            th_cell, td_cell = row
            title, value = th_cell.text, get_text_val(td_cell.text)
            if title == "Total Shares Outstanding (millions)":
                data["total_shares"] = value
            elif title == "Institutional Ownership":
                data["institutional_ownership"] = value
    
    # Active positions
    table = tree.find('.//div[@class="infoTable paddingT5px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            first, second, third = row
            label, holders, shares = first.text, get_text_val(second.text), get_text_val(third.text)
            if label == "Increased Positions":
                data["increased_holders"] = holders
                data["increased_shares"] = shares
            elif label == "Decreased Positions":
                data["decreased_holders"] = holders
                data["decreased_shares"] = shares
            elif label == "Held Positions":
                data["held_holders"] = holders
                data["held_shares"] = shares
    
    # New and sold out positions    
    table = tree.find('.//div[@class="infoTable floatL marginT15px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            first, second, third = row
            label, holders, shares = first.text, get_text_val(second.text), get_text_val(third.text)
            if label == "New Positions":
                data["new_holders"] = holders
                data["new_shares"] = shares
            elif label == "Sold Out Positions":
                data["sold_holders"] = holders
                data["sold_shares"] = shares
            
    return data

get_data("TSLA")

{'decreased_holders': '264',
 'decreased_shares': '15767469',
 'held_holders': '127',
 'held_shares': '74301228',
 'increased_holders': '393',
 'increased_shares': '5260346',
 'institutional_ownership': '56.72%',
 'new_holders': '122',
 'new_shares': '1344778',
 'price': '$300.75',
 'sold_holders': '63',
 'sold_shares': '1765435',
 'total_shares': '168'}

In [4]:
directory = os.path.join("n16_data_cache", str(datetime.now().date()))
if not os.path.exists(directory):
    os.makedirs(directory)

def save_data_to_cache(symbols):
    
    num_worker_threads = 10
    
    def worker():
        while True:
            symbol = q.get()
            if symbol is None:
                break
                
            data = get_data(symbol)
            if data:
                fname = os.path.join(directory, "{}.csv".format(symbol))
                with open(fname, 'w') as f:
                    w = csv.DictWriter(f, data.keys())
                    w.writeheader()
                    w.writerow(data)
            else:
                print("Couldn't get data for {}".format(symbol))
            
            q.task_done()

    q = queue.Queue()

    threads = []
    for i in range(num_worker_threads):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    for symbol in symbols:
        fname = os.path.join(directory, "{}.csv".format(symbol))
        if not os.path.isfile(fname):  # not in the cache
            q.put(symbol)

    # block until all tasks are done
    q.join()

    # stop workers
    for i in range(num_worker_threads):
        q.put(None)
    
    # join threads
    for t in threads:
        t.join()
        
save_data_to_cache(symbols)

Couldn't get data for PVTB
Couldn't get data for PNRA
Couldn't get data for AAPC
Couldn't get data for SPNC
Couldn't get data for NVET
Couldn't get data for UNXL
Couldn't get data for ARIS
Couldn't get data for OKSB
Couldn't get data for CLACU
Couldn't get data for MXPT
Couldn't get data for PAACU
Couldn't get data for SPLS
Couldn't get data for MOCO
Couldn't get data for GSOL
Couldn't get data for TUTT
Couldn't get data for INNL
Couldn't get data for WMAR
Couldn't get data for SPAN
Couldn't get data for ZBZZT
Couldn't get data for FUEL
Couldn't get data for AXARU
Couldn't get data for COVS
Couldn't get data for AGNCP
Couldn't get data for DRWI
Couldn't get data for GNVC
Couldn't get data for CPAAW
Couldn't get data for RTK
Couldn't get data for HOTRW
Couldn't get data for EVBS
Couldn't get data for EGT
Couldn't get data for PTXP
Couldn't get data for ELOS
Couldn't get data for PLYAW
Couldn't get data for ACTX
Couldn't get data for SAJA
Couldn't get data for AXAR
Couldn't get data for 

In [5]:
def get_data_from_cache(symbol):
    fname = os.path.join(directory, "{}.csv".format(symbol))
    if os.path.isfile(fname):
        with open(fname, 'r') as f:
            reader = csv.reader(f)
            header, values = reader
            values = [float(v) if v.isnumeric() else v for v in values]
            data = dict(zip(header, values))
            return data

get_data_from_cache("TSLA")

{'decreased_holders': 264.0,
 'decreased_shares': 15767469.0,
 'held_holders': 127.0,
 'held_shares': 74301228.0,
 'increased_holders': 393.0,
 'increased_shares': 5260346.0,
 'institutional_ownership': '56.72%',
 'new_holders': 122.0,
 'new_shares': 1344778.0,
 'price': '$300.5',
 'sold_holders': 63.0,
 'sold_shares': 1765435.0,
 'total_shares': 168.0}

In [6]:
data = []
for symbol in symbols:
    symbol_data = get_data_from_cache(symbol)
    if symbol_data:
        symbol_data["symbol"] = symbol
        data.append(symbol_data)
df = pd.DataFrame.from_records(data)
df[:10]

Unnamed: 0,decreased_holders,decreased_shares,held_holders,held_shares,increased_holders,increased_shares,institutional_ownership,new_holders,new_shares,price,sold_holders,sold_shares,symbol,total_shares
0,16.0,171421.0,7.0,4094440.0,41.0,2123056.0,38.14%,14.0,329081.0,$38.17,6.0,130120.0,FEP,17.0
1,1.0,2100.0,2.0,209476.0,7.0,120416.0,04.78%,6.0,109199.0,$64.82,0.0,0.0,GRVY,7.0
2,54.0,912755.0,21.0,14559300.0,55.0,1492969.0,101.44%,8.0,274237.0,$73.62,21.0,374656.0,MGPI,17.0
3,38.0,1356376.0,16.0,27088300.0,55.0,1244120.0,76.81%,15.0,303104.0,$9.725,8.0,158492.0,LOCO,39.0
4,9.0,7083.0,11.0,349042.0,8.0,33461.0,17.37%,6.0,29927.0,$57.302,4.0,1410.0,FSFG,2.0
5,16.0,322392.0,6.0,195663.0,7.0,144512.0,06.46%,3.0,124195.0,$2.73,10.0,217414.0,CETX,10.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0%,0.0,0.0,$24.4,0.0,0.0,GFNSL,3.0
7,87.0,10449534.0,27.0,26332400.0,139.0,12142273.0,68.41%,49.0,2816846.0,$98.02,27.0,710296.0,SINA,72.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,0.0,0.0,HRMNW,0.0
9,111.0,5259309.0,23.0,38096500.0,95.0,3776271.0,97.58%,24.0,1312169.0,$16.3,43.0,2508992.0,VECO,48.0


In [7]:
def draw_table(sub_df, headers, fields):
    s = "<table><tr><th>" + "</th><th>".join(headers) + "</th></tr>"
    for r in sub_df.itertuples():
        s += "<tr><td>" + "</td><td>".join(str(getattr(r, f)) for f in fields) + "</td></tr>"
    s += "</table>"
    display(HTML(s))
    
draw_table(df[:10], ("Symbol", "Price"), ("symbol", "price"))

Symbol,Price
FEP,$38.17
GRVY,$64.82
MGPI,$73.62
LOCO,$9.725
FSFG,$57.302
CETX,$2.73
GFNSL,$24.4
SINA,$98.02
HRMNW,
VECO,$16.3


In [8]:
df["ratio"] = (df.increased_holders - df.decreased_holders) / df.held_holders
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by Active Bullish Activity By Number of Funds((Increased – Decreased)/Held)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "Increased", "Decreased", "Held", "Ratio"),
    ("symbol", "price", "institutional_ownership", "increased_holders", "decreased_holders", "held_holders", "ratio")
)

Stock,Share price,Inst.Ownership,Increased,Decreased,Held,Ratio
GOODM,$26.01,05.79%,1.0,0.0,0.0,inf
IMRNW,$1.4,0%,2.0,0.0,0.0,inf
MBSD,$23.84,70.79%,8.0,6.0,0.0,inf
MOXC,$2.47,00.02%,2.0,0.0,0.0,inf
CPTAG,$24.9,10.41%,4.0,0.0,0.0,inf
FMCIU,$10.1508,0%,14.0,2.0,0.0,inf
RFEM,$69.393,47.43%,19.0,4.0,0.0,inf
FRSX,$5.53,00.12%,1.0,0.0,0.0,inf
ANGI,$12.02,00.08%,1.0,0.0,0.0,inf
MPAC,$9.74,43.49%,20.0,0.0,0.0,inf


In [9]:
df["ratio"] = df.increased_shares / df.decreased_shares
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by Active Bullish Activity By Shares (Increased / Decreased)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "Increased", "Decreased", "Held", "Ratio"),
    ("symbol", "price", "institutional_ownership", "increased_shares", "decreased_shares", "held_shares", "ratio")
)

Stock,Share price,Inst.Ownership,Increased,Decreased,Held,Ratio
GOODM,$26.01,05.79%,42553.0,0.0,152365.0,inf
APWC,$2.9,00.38%,33770.0,0.0,18675.0,inf
PAVMW,$1.5,0%,3500.0,0.0,19444.0,inf
MACQW,$0.4793,0%,153150.0,0.0,1779533.0,inf
MMDMR,$0.4,0%,12187496.0,0.0,0.0,inf
USOI,$25.63,0%,265885.0,0.0,0.0,inf
DTUL,$64.71,22.24%,10800.0,0.0,3881.0,inf
EDBI,$32.66,76.28%,296381.0,0.0,85043.0,inf
CHSCM,$27.0901,00.07%,101.0,0.0,12828.0,inf
MSDIW,$0.0311,0%,452.0,0.0,255.0,inf


In [10]:
df["ratio"] = df.new_holders / df.sold_holders
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by New Bullish Activity By Funds (New / Sold Out)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "New", "Sold out", "Ratio"),
    ("symbol", "price", "institutional_ownership", "new_holders", "sold_holders", "ratio")
)

Stock,Share price,Inst.Ownership,New,Sold out,Ratio
MACQW,$0.4793,0%,2.0,0.0,inf
MIIIU,$10.52,19.60%,2.0,0.0,inf
HYXE,$51.65,89.83%,1.0,0.0,inf
INFR,$29.77,04.83%,2.0,0.0,inf
AAME,$3.6,05.06%,1.0,0.0,inf
QYLD,$24.4548,18.64%,4.0,0.0,inf
AETI,$1.5415,40.94%,2.0,0.0,inf
FCAP,$35.75,01.41%,1.0,0.0,inf
OCC,$2.45,12.27%,1.0,0.0,inf
VVPR,$3.1,02.72%,1.0,0.0,inf


In [11]:
df["ratio"] = df.new_shares / df.sold_shares
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by New Bullish Activity By Shares (New / Sold Out)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "New", "Sold out", "Ratio"),
    ("symbol", "price", "institutional_ownership", "new_shares", "sold_shares", "ratio")
)

Stock,Share price,Inst.Ownership,New,Sold out,Ratio
MACQW,$0.4793,0%,151950.0,0.0,inf
JSYNR,$0.2876,0%,500.0,0.0,inf
VRNA,$13.8,34.33%,3287489.0,0.0,inf
USOI,$25.63,0%,265885.0,0.0,inf
KAAC,$9.71,40.29%,14113792.0,0.0,inf
JSYNU,$10.76,15.96%,20900.0,0.0,inf
SSNT,$4.14,04.91%,220352.0,0.0,inf
FTXN,$19.8812,47.36%,88649.0,0.0,inf
TATT,$11,02.93%,11913.0,0.0,inf
FMCIR,$0.35,0%,9165127.0,0.0,inf
