# Find out the ETFs that a given stock belongs to

In [1]:
import requests
import json
import re
import time
import random
import html

import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [2]:
def getLandingPage(ticker: str):
    url = f"https://etfdb.com/stock/{ticker}/"

    payload={}
    headers = {}

    response = requests.get(url, headers=headers, data=payload)

    return response

def getId(ticker: str):
    landingPageResponse = getLandingPage(ticker)
    by_stock = re.findall('{"by_stock":\d*}', landingPageResponse.text)
    id = re.findall('\d+', by_stock[0])[0]
    return id

def getExposures(ticker: str):
    id = getId(ticker)
    
    rows = []

    total = float('inf')
    print(f"Retrieving exposures for {ticker}")

    while len(rows) < total:
        url = f'https://etfdb.com/data_set/?tm=40274&cond={{"by_stock":{id}}}&no_null_sort=&count_by_id=true&sort=weighting&order=desc&offset={len(rows)}'

        payload={}
        headers = {}
        response = requests.get(url, headers=headers, data=payload)
        if response.status_code == 200:
            payload = json.loads(response.text)
            
            payload_total = payload['total']
            payload_rows = payload['rows']
        
        total = payload_total
        rows.extend(payload_rows)
        # print(f'Retrieved {len(rows)} rows...', end='\r')
        print('.', end='')

        timeWait = random.randint(5,10)
        time.sleep(timeWait)
    
    print('Done!')
    return rows

In [11]:
ticker = 'WM'

In [12]:
exposure_rows = getExposures(ticker)

Retrieving exposures for WM
.Done!


In [13]:
exposures = []
for row in exposure_rows:
    e = {}

    e['symbol'] = re.sub('<.*?>', '', html.unescape(row['symbol']))
    e['name'] = re.sub('<.*?>', '', html.unescape(row['sather_etfs.name']))
    e['etf_category'] = re.sub('<.*?>', '', html.unescape(row['etf_category']))
    e['expense_ratio'] = row['expense_ratio']
    e['weighting'] = row['weighting']

    exposures.append(e)

In [14]:
# for row in exposure_rows:
#     print(row)

In [15]:
exposures_df = pd.DataFrame.from_dict(exposures)

In [16]:
exposures_df

Unnamed: 0,symbol,name,etf_category,expense_ratio,weighting
0,EVX,VanEck Vectors Environmental Services ETF,Industrials Equities,0.55%,10.20%
1,CCOR,Core Alternative ETF,Large Cap Growth Equities,1.09%,2.89%
2,HUSV,First Trust Horizon Managed Volatility Domesti...,Volatility Hedged Equity,0.70%,1.97%
3,VTRN,VictoryShares Top Veteran Employers ETF,,0.60%,1.97%
4,VETS,Pacer Military Times Best Employers ETF,Large Cap Blend Equities,0.60%,1.95%
5,AUSF,Global X Adaptive U.S. Factor ETF,All Cap Equities,0.27%,1.63%
6,TPHD,Timothy Plan High Dividend Stock ETF,All Cap Equities,0.52%,1.53%
7,USMV,iShares MSCI USA Min Vol Factor ETF,Large Cap Growth Equities,0.15%,1.47%
8,ACWV,iShares MSCI Global Min Vol Factor ETF,Large Cap Blend Equities,0.20%,1.31%
9,LGLV,SPDR SSGA US Large Cap Low Volatility Index ETF,Volatility Hedged Equity,0.12%,1.26%
