In [1]:
from IPython.display import display, HTML
from datetime import datetime
from lxml import html, etree, objectify
import pandas as pd
import requests
import threading
import queue
import csv
import os

In [2]:
def get_symbols():
    with open("n16_symbols.csv", "r") as f:
        symbols = f.read().split()
    return list(set(symbols))
symbols = get_symbols()
len(symbols)

3198

In [3]:
def get_data(symbol):
    url = "http://www.nasdaq.com/symbol/{}/institutional-holdings".format(symbol.lower())
    page = requests.get(url)
    tree = html.fromstring(page.content)    
    
    data = {}
    # Price 
    div = tree.find('.//div[@id="qwidget_lastsale"]')
    if div is not None:
        data["price"] = div.text
        
    def get_text_val(t):
        return ("0" if t is None else t).replace(",", "")
    
    # Institutional Ownership
    table = tree.find('.//div[@class="infoTable marginT15px marginL15px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            th_cell, td_cell = row
            title, value = th_cell.text, get_text_val(td_cell.text)
            if title == "Total Shares Outstanding (millions)":
                data["total_shares"] = value
            elif title == "Institutional Ownership":
                data["institutional_ownership"] = value
    
    # Active positions
    table = tree.find('.//div[@class="infoTable paddingT5px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            first, second, third = row
            label, holders, shares = first.text, get_text_val(second.text), get_text_val(third.text)
            if label == "Increased Positions":
                data["increased_holders"] = holders
                data["increased_shares"] = shares
            elif label == "Decreased Positions":
                data["decreased_holders"] = holders
                data["decreased_shares"] = shares
            elif label == "Held Positions":
                data["held_holders"] = holders
                data["held_shares"] = shares
    
    # New and sold out positions    
    table = tree.find('.//div[@class="infoTable floatL marginT15px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            first, second, third = row
            label, holders, shares = first.text, get_text_val(second.text), get_text_val(third.text)
            if label == "New Positions":
                data["new_holders"] = holders
                data["new_shares"] = shares
            elif label == "Sold Out Positions":
                data["sold_holders"] = holders
                data["sold_shares"] = shares
            
    return data

get_data("TSLA")

{'decreased_holders': '268',
 'decreased_shares': '15736105',
 'held_holders': '125',
 'held_shares': '74016938',
 'increased_holders': '387',
 'increased_shares': '5156362',
 'institutional_ownership': '56.87%',
 'new_holders': '129',
 'new_shares': '1311576',
 'price': '$320.87',
 'sold_holders': '63',
 'sold_shares': '1765026',
 'total_shares': '167'}

In [4]:
directory = os.path.join("n16_data_cache", str(datetime.now().date()))
if not os.path.exists(directory):
    os.makedirs(directory)

def save_data_to_cache(symbols):
    
    num_worker_threads = 10
    
    def worker():
        while True:
            symbol = q.get()
            if symbol is None:
                break
                
            data = get_data(symbol)
            if data:
                fname = os.path.join(directory, "{}.csv".format(symbol))
                with open(fname, 'w') as f:
                    w = csv.DictWriter(f, data.keys())
                    w.writeheader()
                    w.writerow(data)
            else:
                print("Couldn't get data for {}".format(symbol))
            
            q.task_done()

    q = queue.Queue()

    threads = []
    for i in range(num_worker_threads):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    for symbol in symbols:
        fname = os.path.join(directory, "{}.csv".format(symbol))
        if not os.path.isfile(fname):  # not in the cache
            q.put(symbol)

    # block until all tasks are done
    q.join()

    # stop workers
    for i in range(num_worker_threads):
        q.put(None)
    
    # join threads
    for t in threads:
        t.join()
        
save_data_to_cache(symbols)

Couldn't get data for SHOR
Couldn't get data for GLADO
Couldn't get data for RTK
Couldn't get data for DRWI
Couldn't get data for ZBZZT
Couldn't get data for PAACR
Couldn't get data for FITS
Couldn't get data for CRDT
Couldn't get data for HNH
Couldn't get data for CLACU
Couldn't get data for OXLCN
Couldn't get data for CPAAU
Couldn't get data for HOTRW
Couldn't get data for GUID
Couldn't get data for MXPT
Couldn't get data for WBB
Couldn't get data for ZXZZT
Couldn't get data for WBMD
Couldn't get data for PVTBP
Couldn't get data for AMRI
Couldn't get data for SAJA
Couldn't get data for MOCO
Couldn't get data for PLYAW
Couldn't get data for FULLL
Couldn't get data for WSFSL
Couldn't get data for MAUI
Couldn't get data for INNL
Couldn't get data for MRVC
Couldn't get data for FUEL
Couldn't get data for ZWZZT
Couldn't get data for NDRM
Couldn't get data for AXAR
Couldn't get data for OKSB
Couldn't get data for MSLI
Couldn't get data for ZAZZT
Couldn't get data for EVBS
Couldn't get data

In [5]:
def get_data_from_cache(symbol):
    fname = os.path.join(directory, "{}.csv".format(symbol))
    if os.path.isfile(fname):
        with open(fname, 'r') as f:
            reader = csv.reader(f)
            header, values = reader
            values = [float(v) if v.isnumeric() else v for v in values]
            data = dict(zip(header, values))
            return data

get_data_from_cache("TSLA")

{'decreased_holders': 268.0,
 'decreased_shares': 15736105.0,
 'held_holders': 125.0,
 'held_shares': 74016938.0,
 'increased_holders': 387.0,
 'increased_shares': 5156362.0,
 'institutional_ownership': '56.87%',
 'new_holders': 129.0,
 'new_shares': 1311576.0,
 'price': '$320.87',
 'sold_holders': 63.0,
 'sold_shares': 1765026.0,
 'total_shares': 167.0}

In [6]:
data = []
for symbol in symbols:
    symbol_data = get_data_from_cache(symbol)
    if symbol_data:
        symbol_data["symbol"] = symbol
        data.append(symbol_data)
df = pd.DataFrame.from_records(data)
df[:10]

Unnamed: 0,decreased_holders,decreased_shares,held_holders,held_shares,increased_holders,increased_shares,institutional_ownership,new_holders,new_shares,price,sold_holders,sold_shares,symbol,total_shares
0,36.0,4456121.0,12.0,1349570.0,29.0,2075342.0,21.24%,14.0,635845.0,$1.72,24.0,2167192.0,GNMX,37.0
1,,,,,,,,,,$27.94,,,CHSCN,
2,24.0,1234759.0,21.0,9140030.0,68.0,1777506.0,68.09%,21.0,762305.0,$35.65,7.0,1161515.0,PGC,18.0
3,141.0,21153734.0,47.0,147285000.0,127.0,33188980.0,82.82%,37.0,18158866.0,$15.1,40.0,2586535.0,WEN,243.0
4,10.0,68686.0,4.0,1342320.0,6.0,135306.0,35.32%,1.0,2689.0,$14.48,3.0,25924.0,BOTJ,4.0
5,26.0,386397.0,23.0,19267400.0,76.0,5002018.0,44.59%,27.0,2624359.0,$19.25,10.0,122397.0,AXDX,55.0
6,18.0,701655.0,16.0,20816600.0,12.0,502371.0,57.67%,4.0,23620.0,$5.6438,5.0,55694.0,BKEP,38.0
7,5.0,23905.0,2.0,2008450.0,2.0,25348.0,83.99%,2.0,25348.0,$18.5361,3.0,17443.0,KRMA,2.0
8,11.0,208050.0,5.0,2440920.0,36.0,2448010.0,41.03%,16.0,1221550.0,$10.8,4.0,54953.0,ASUR,12.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,,0.0,0.0,GPIA,0.0


In [7]:
def draw_table(sub_df, headers, fields):
    s = "<table><tr><th>" + "</th><th>".join(headers) + "</th></tr>"
    for r in sub_df.itertuples():
        s += "<tr><td>" + "</td><td>".join(str(getattr(r, f)) for f in fields) + "</td></tr>"
    s += "</table>"
    display(HTML(s))
    
draw_table(df[:10], ("Symbol", "Price"), ("symbol", "price"))

Symbol,Price
GNMX,$1.72
CHSCN,$27.94
PGC,$35.65
WEN,$15.1
BOTJ,$14.48
AXDX,$19.25
BKEP,$5.6438
KRMA,$18.5361
ASUR,$10.8
GPIA,


In [8]:
df["ratio"] = (df.increased_holders - df.decreased_holders) / df.held_holders
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by Active Bullish Activity By Number of Funds((Increased – Decreased)/Held)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "Increased", "Decreased", "Held", "Ratio"),
    ("symbol", "price", "institutional_ownership", "increased_holders", "decreased_holders", "held_holders", "ratio")
)

Stock,Share price,Inst.Ownership,Increased,Decreased,Held,Ratio
ATNX,$16.91,01.10%,17.0,0.0,0.0,inf
MOXC,$2.4,00.02%,2.0,0.0,0.0,inf
ESGU,$56.57,38.74%,3.0,2.0,0.0,inf
ORIG,$26.79,00.14%,1.0,0.0,0.0,inf
CCCR,$3.16,00.78%,4.0,2.0,0.0,inf
FINX,$21.62,03.26%,3.0,0.0,0.0,inf
JSMD,$36.66,58.03%,7.0,3.0,0.0,inf
CLWT,$3.9,00.35%,2.0,1.0,0.0,inf
ADOM,$4.82,0%,2.0,1.0,0.0,inf
CID,$35.4162,54.04%,8.0,4.0,0.0,inf


In [9]:
df["ratio"] = df.increased_shares / df.decreased_shares
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by Active Bullish Activity By Shares (Increased / Decreased)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "Increased", "Decreased", "Held", "Ratio"),
    ("symbol", "price", "institutional_ownership", "increased_shares", "decreased_shares", "held_shares", "ratio")
)

Stock,Share price,Inst.Ownership,Increased,Decreased,Held,Ratio
ATNX,$16.91,01.10%,560739.0,0.0,66904.0,inf
JCTCF,$14.45,12.79%,2819.0,0.0,284506.0,inf
JSYNU,$10.7792,15.96%,20900.0,0.0,601409.0,inf
MACQW,$0.3351,0%,153150.0,0.0,1779533.0,inf
ITEQ,$32.34,18.74%,137241.0,0.0,22019.0,inf
TATT,$10.8,02.93%,101421.0,0.0,157571.0,inf
DSLV,$24.7799,54.86%,448190.0,0.0,300001.0,inf
HUNT,$9.7889,61.51%,7614358.0,0.0,1719000.0,inf
RDIB,$22,06.50%,600.0,0.0,108716.0,inf
SBNYW,$96.07,0%,112.0,0.0,43309.0,inf


In [10]:
df["ratio"] = df.new_holders / df.sold_holders
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by New Bullish Activity By Funds (New / Sold Out)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "New", "Sold out", "Ratio"),
    ("symbol", "price", "institutional_ownership", "new_holders", "sold_holders", "ratio")
)

Stock,Share price,Inst.Ownership,New,Sold out,Ratio
ATNX,$16.91,01.10%,16.0,0.0,inf
FLAT,$61.28,39.24%,1.0,0.0,inf
CIL,$39.6755,66.86%,1.0,0.0,inf
MGCD,$8.56,22.61%,1.0,0.0,inf
INFR,$29.51,04.85%,4.0,0.0,inf
IDSA,$1.7001,03.93%,2.0,0.0,inf
KBLMU,$10.2,0%,17.0,0.0,inf
HBK,$14.645,29.35%,2.0,0.0,inf
AZRX,$3.2,06.06%,6.0,0.0,inf
IRDMB,$434.87,33.95%,2.0,0.0,inf


In [11]:
df["ratio"] = df.new_shares / df.sold_shares
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by New Bullish Activity By Shares (New / Sold Out)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "New", "Sold out", "Ratio"),
    ("symbol", "price", "institutional_ownership", "new_shares", "sold_shares", "ratio")
)

Stock,Share price,Inst.Ownership,New,Sold out,Ratio
ATNX,$16.91,01.10%,440703.0,0.0,inf
FNBG,$34.8,19.68%,1045326.0,0.0,inf
FMCIW,$0.37,0%,3596225.0,0.0,inf
IMRN,$5.06,00.34%,11207.0,0.0,inf
BPFHW,$10,0%,10000.0,0.0,inf
OTTW,$14.01,14.57%,71646.0,0.0,inf
MMDMR,$0.4,0%,12187496.0,0.0,inf
ARGX,$23,24.70%,6641679.0,0.0,inf
JSYNU,$10.7792,15.96%,20900.0,0.0,inf
MACQW,$0.3351,0%,151950.0,0.0,inf
