In [1]:
from IPython.display import display, HTML
from datetime import datetime
from lxml import html, etree, objectify
import pandas as pd
import requests
import threading
import queue
import csv
import os

In [2]:
def get_symbols():
    with open("n16_symbols.csv", "r") as f:
        symbols = f.read().split()
    return list(set(symbols))
symbols = get_symbols()
len(symbols)

3198

In [3]:
def get_data(symbol):
    url = "http://www.nasdaq.com/symbol/{}/institutional-holdings".format(symbol.lower())
    page = requests.get(url)
    tree = html.fromstring(page.content)    
    
    data = {}
    # Price 
    div = tree.find('.//div[@id="qwidget_lastsale"]')
    if div is not None:
        data["price"] = div.text
        
    def get_text_val(t):
        return ("0" if t is None else t).replace(",", "")
    
    # Institutional Ownership
    table = tree.find('.//div[@class="infoTable marginT15px marginL15px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            th_cell, td_cell = row
            title, value = th_cell.text, get_text_val(td_cell.text)
            if title == "Total Shares Outstanding (millions)":
                data["total_shares"] = value
            elif title == "Institutional Ownership":
                data["institutional_ownership"] = value
    
    # Active positions
    table = tree.find('.//div[@class="infoTable paddingT5px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            first, second, third = row
            label, holders, shares = first.text, get_text_val(second.text), get_text_val(third.text)
            if label == "Increased Positions":
                data["increased_holders"] = holders
                data["increased_shares"] = shares
            elif label == "Decreased Positions":
                data["decreased_holders"] = holders
                data["decreased_shares"] = shares
            elif label == "Held Positions":
                data["held_holders"] = holders
                data["held_shares"] = shares
    
    # New and sold out positions    
    table = tree.find('.//div[@class="infoTable floatL marginT15px"]')
    if table is not None:
        for row in table.findall('.//tr'):
            first, second, third = row
            label, holders, shares = first.text, get_text_val(second.text), get_text_val(third.text)
            if label == "New Positions":
                data["new_holders"] = holders
                data["new_shares"] = shares
            elif label == "Sold Out Positions":
                data["sold_holders"] = holders
                data["sold_shares"] = shares
            
    return data

get_data("TSLA")

{'decreased_holders': '320',
 'decreased_shares': '8502436',
 'held_holders': '158',
 'held_shares': '80210424',
 'increased_holders': '369',
 'increased_shares': '10649665',
 'institutional_ownership': '58.82%',
 'new_holders': '108',
 'new_shares': '788939',
 'price': '$294.09',
 'sold_holders': '90',
 'sold_shares': '2301657',
 'total_shares': '169'}

In [4]:
directory = os.path.join("n16_data_cache", str(datetime.now().date()))
if not os.path.exists(directory):
    os.makedirs(directory)

def save_data_to_cache(symbols):
    
    num_worker_threads = 3
    
    def worker():
        while True:
            symbol = q.get()
            if symbol is None:
                break
            
            try:
                data = get_data(symbol)
            except Exception as e:
                print(e)
            else:
                if data:
                    fname = os.path.join(directory, "{}.csv".format(symbol))
                    with open(fname, 'w') as f:
                        w = csv.DictWriter(f, data.keys())
                        w.writeheader()
                        w.writerow(data)
                else:
                    print("No data for {}".format(symbol))
            
            q.task_done()

    q = queue.Queue()

    threads = []
    for i in range(num_worker_threads):
        t = threading.Thread(target=worker)
        t.start()
        threads.append(t)

    for symbol in symbols:
        fname = os.path.join(directory, "{}.csv".format(symbol))
        if not os.path.isfile(fname):  # not in the cache
            q.put(symbol)

    # block until all tasks are done
    q.join()

    # stop workers
    for i in range(num_worker_threads):
        q.put(None)
    
    # join threads
    for t in threads:
        t.join()
        
save_data_to_cache(symbols)

No data for CLACU
No data for HRMNU
No data for FSCFL
No data for PCO
No data for ZBZZT
No data for IMED
No data for GNVC
No data for SHOR
No data for ISM
No data for CBMXW
No data for CPAAW
No data for NRCIB
No data for BWLD
No data for HRMNW
No data for SNBC
No data for ZAZZT
No data for OACQR
No data for DELTW
No data for APLP
No data for CMLS
No data for ABCO
No data for AAAP
No data for LBIO
No data for PTXP
No data for ROKA
No data for KITE
No data for ZJZZT
No data for CLAC
No data for THLD
No data for SPNC
No data for BKMU
No data for NDRM
No data for RLOG
No data for MAUI
No data for WYIGU
No data for WLB
No data for TESO
No data for PVTBP
No data for KTEC
No data for FHCO
No data for MSFG
No data for BUR
No data for OACQU
No data for SNC
No data for ANDAR
No data for QPACU
No data for EVAR
No data for DRAM
No data for SRSC
No data for NVDQ
No data for RXDX
No data for CPAAU
No data for CSBK
No data for FSBK
No data for FSC
No data for CBMX
No data for CASC
No data for ZVZZT
N

In [5]:
def get_data_from_cache(symbol):
    fname = os.path.join(directory, "{}.csv".format(symbol))
    if os.path.isfile(fname):
        with open(fname, 'r') as f:
            reader = csv.reader(f)
            header, values = reader
            values = [float(v) if v.isnumeric() else v for v in values]
            data = dict(zip(header, values))
            return data

get_data_from_cache("TSLA")

{'decreased_holders': 320.0,
 'decreased_shares': 8502436.0,
 'held_holders': 158.0,
 'held_shares': 80210424.0,
 'increased_holders': 369.0,
 'increased_shares': 10649665.0,
 'institutional_ownership': '58.82%',
 'new_holders': 108.0,
 'new_shares': 788939.0,
 'price': '$294.09',
 'sold_holders': 90.0,
 'sold_shares': 2301657.0,
 'total_shares': 169.0}

In [6]:
data = []
for symbol in symbols:
    symbol_data = get_data_from_cache(symbol)
    if symbol_data:
        symbol_data["symbol"] = symbol
        data.append(symbol_data)
df = pd.DataFrame.from_records(data)
df[:10]

Unnamed: 0,decreased_holders,decreased_shares,held_holders,held_shares,increased_holders,increased_shares,institutional_ownership,new_holders,new_shares,price,sold_holders,sold_shares,symbol,total_shares
0,9.0,9423.0,12.0,1057070.0,12.0,37337.0,24.03%,3.0,10582.0,$17.5,2.0,480.0,UG,5.0
1,56.0,1343872.0,29.0,68573400.0,135.0,8988074.0,77.38%,43.0,2564218.0,$51.19,16.0,391841.0,VMBS,102.0
2,127.0,1737700.0,43.0,21086000.0,148.0,3058055.0,74.49%,45.0,801209.0,$75.5,37.0,396383.0,IDCC,35.0
3,46.0,4661161.0,19.0,18956800.0,61.0,7590855.0,58.18%,23.0,3661435.0,$9.35,16.0,2609858.0,USAT,54.0
4,15.0,468726.0,11.0,7765000.0,17.0,879174.0,33.86%,8.0,495694.0,$3.37,6.0,309080.0,FLL,27.0
5,6.0,34919.0,2.0,398221.0,7.0,76221.0,95.35%,4.0,39378.0,$16.98,3.0,9188.0,VIIX,1.0
6,27.0,1472770.0,10.0,29346500.0,46.0,994702.0,71.15%,22.0,528645.0,$33.01,6.0,202956.0,TUSK,45.0
7,18.0,80111.0,7.0,151263.0,9.0,30221.0,40.25%,3.0,11656.0,$69.78,3.0,4576.0,PSCC,1.0
8,12.0,182956.0,11.0,8846730.0,9.0,583660.0,51.45%,1.0,222260.0,$0.768,5.0,95406.0,SUMR,19.0
9,58.0,3716620.0,29.0,18738800.0,54.0,2932577.0,45.50%,24.0,982332.0,$22,25.0,2325627.0,AXDX,56.0


In [7]:
def draw_table(sub_df, headers, fields):
    s = "<table><tr><th>" + "</th><th>".join(headers) + "</th></tr>"
    for r in sub_df.itertuples():
        s += "<tr><td>" + "</td><td>".join(str(getattr(r, f)) for f in fields) + "</td></tr>"
    s += "</table>"
    display(HTML(s))
    
draw_table(df[:10], ("Symbol", "Price"), ("symbol", "price"))

Symbol,Price
UG,$17.5
VMBS,$51.19
IDCC,$75.5
USAT,$9.35
FLL,$3.37
VIIX,$16.98
TUSK,$33.01
PSCC,$69.78
SUMR,$0.768
AXDX,$22


In [8]:
df["ratio"] = (df.increased_holders - df.decreased_holders) / df.held_holders
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by Active Bullish Activity By Number of Funds((Increased – Decreased)/Held)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "Increased", "Decreased", "Held", "Ratio"),
    ("symbol", "price", "institutional_ownership", "increased_holders", "decreased_holders", "held_holders", "ratio")
)

Stock,Share price,Inst.Ownership,Increased,Decreased,Held,Ratio
IMRN,$9,01.58%,3.0,0.0,0.0,inf
FTXD,$21.1915,34.98%,2.0,1.0,0.0,inf
FORK,$3.95,00.78%,5.0,2.0,0.0,inf
CALI,$2.78,00.85%,4.0,3.0,0.0,inf
GFNCP,$100.01,14.03%,2.0,0.0,0.0,inf
FALN,$26.35,22.37%,9.0,4.0,0.0,inf
BMLP,$47.04,02.04%,3.0,1.0,0.0,inf
DWLD,$26.12,40.96%,23.0,4.0,0.0,inf
CLDC,$2.11,00.02%,1.0,0.0,0.0,inf
CID,$35.7,47.83%,12.0,1.0,0.0,inf


In [9]:
df["ratio"] = df.increased_shares / df.decreased_shares
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by Active Bullish Activity By Shares (Increased / Decreased)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "Increased", "Decreased", "Held", "Ratio"),
    ("symbol", "price", "institutional_ownership", "increased_shares", "decreased_shares", "held_shares", "ratio")
)

Stock,Share price,Inst.Ownership,Increased,Decreased,Held,Ratio
IMRN,$9,01.58%,45806.0,0.0,10691.0,inf
HIHO,$4.95,04.35%,14699.0,0.0,150686.0,inf
BYFC,$2.18,10.43%,299.0,0.0,1951659.0,inf
GBLIL,$25.5992,01.32%,1138.0,0.0,67525.0,inf
GAINM,$25.35,01.11%,233.0,0.0,25350.0,inf
OSBCP,$10.499,00.15%,4000.0,0.0,0.0,inf
RDIB,$27.66,07.24%,2051.0,0.0,119592.0,inf
BNTCW,$0.24,0%,459.0,0.0,140578.0,inf
JSM,$21.7,00.14%,1.0,0.0,15403.0,inf
GBLIZ,$25.29,02.36%,377.0,0.0,97347.0,inf


In [10]:
df["ratio"] = df.new_holders / df.sold_holders
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by New Bullish Activity By Funds (New / Sold Out)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "New", "Sold out", "Ratio"),
    ("symbol", "price", "institutional_ownership", "new_holders", "sold_holders", "ratio")
)

Stock,Share price,Inst.Ownership,New,Sold out,Ratio
IMRN,$9,01.58%,1.0,0.0,inf
SSNT,$3.8,06.31%,2.0,0.0,inf
VIIZ,$6.94,31.11%,2.0,0.0,inf
PSAU,$18.955,36.13%,3.0,0.0,inf
FTXR,$24.2403,30.72%,4.0,0.0,inf
MRUS,$17.34,25.75%,5.0,0.0,inf
DDBI,$29.8176,89.04%,2.0,0.0,inf
PBBI,$10.5,31.36%,1.0,0.0,inf
VSDA,$27.66,19.00%,4.0,0.0,inf
LNGR,$19.552,35.35%,2.0,0.0,inf


In [11]:
df["ratio"] = df.new_shares / df.sold_shares
df.sort_values("ratio", ascending=False, inplace=True)
display(HTML("<h3>Top 100 stocks by New Bullish Activity By Shares (New / Sold Out)</h3>"))

draw_table(
    df[:100], 
    ("Stock", "Share price", "Inst.Ownership", "New", "Sold out", "Ratio"),
    ("symbol", "price", "institutional_ownership", "new_shares", "sold_shares", "ratio")
)

Stock,Share price,Inst.Ownership,New,Sold out,Ratio
IMRN,$9,01.58%,400.0,0.0,inf
MARPS,$3.4008,01.86%,3225.0,0.0,inf
XBIO,$1.8282,03.91%,2476.0,0.0,inf
MOXC,$3.17,00.01%,6993.0,0.0,inf
GFNCP,$100.01,14.03%,49121.0,0.0,inf
LEXEB,$39.1672,00.47%,27.0,0.0,inf
NXTDW,$0.3391,0%,45603.0,0.0,inf
BOSC,$2.22,00.88%,15955.0,0.0,inf
RDIB,$27.66,07.24%,1.0,0.0,inf
JASNW,$0.0121,0%,47065.0,0.0,inf
