In [5]:
import requests
import time
from bs4 import BeautifulSoup

hdr = {
    "authority": "finance.yahoo.com",
    "method": "GET",
    "scheme": "https",
    "accept": "text/html",
    "accept-encoding": "gzip, deflate, br",
    "accept-language": "en-US,en;q=0.9",
    "cache-control": "no-cache",
    "dnt": "1",
    "pragma": "no-cache",
    "sec-fetch-mode": "navigate",
    "sec-fetch-site": "same-origin",
    "sec-fetch-user": "?1",
    "upgrade-insecure-requests": "1",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"
}



In [6]:
def get_counts(body, srch):
    try:
        count_beg = body.find('Stocks (')
        if count_beg == -1:
            print(f"Warning: Could not find stock count for search term: {srch}")
            return "0"  # Return 0 if stock count is not found

        rest = body[count_beg + 8: count_beg + 20]
        count_end = rest.find(')')
        count_all = rest[0: count_end]
        return count_all
    except Exception as e:
        print(f"Error extracting count for {srch}: {e}")
        return "0"

def call_url(url, hdr):
    confirmed = False
    while not confirmed:
        try:
            r = requests.get(url, headers=hdr)
            r.raise_for_status()  # Raise an exception for 4xx/5xx HTTP errors
            return r.text
        except requests.exceptions.RequestException as err:
            print(f"Error with URL {url}: {err}")
            time.sleep(1)  # Wait before retrying
    return ""  # Return an empty string if the request fails

def process_block(body, srch, yh_all_sym, hdr):
    for block in range(0, 9999, 100):
        url = f"https://finance.yahoo.com/lookup/equity?s={srch}&t=A&b={block}&c=100"
        body = call_url(url, hdr)
        if not body:  # If the body is empty or failed, skip this block
            print(f"Failed to retrieve data for search term: {srch}, block {block}")
            continue

        soup = BeautifulSoup(body, 'html.parser')
        links = soup.find_all('a')
        is_empty = True
        for link in links:
            if "/quote/" in link.get('href'):
                symbol = link.get('data-symbol')
                if symbol and (symbol.endswith(".NS") or symbol.endswith(".BO")):
                    is_empty = False
                    company_name = link.get_text(strip=True)
                    # Add the company name and symbol as a dictionary
                    yh_all_sym.append({'company_name': company_name, 'symbol': symbol})
        if is_empty:
            break





In [7]:
def main():
    search_set = []
    for x in range(65, 91):  # A-Z
        search_set.append(chr(x))

    for x in range(48, 58):  # 0-9
        search_set.append(chr(x))

    yh_all_sym = []

    for term_1 in search_set:
        for term_2 in search_set:
            search_term = term_1 + term_2
            url = f"https://finance.yahoo.com/lookup/equity?s={search_term}&t=A&b=0&c=25"
            body = call_url(url, hdr)
            all_num = get_counts(body, search_term)
            
            try:
                all_num = int(all_num)
            except ValueError:
                print(f"Error parsing number for {search_term}: {all_num}")
                continue  # Skip this search term if parsing fails

            if all_num < 9000:
                process_block(body, search_term, yh_all_sym, hdr)
            else:
                for term_3 in search_set:
                    search_term = term_1 + term_2 + term_3
                    url = f"https://finance.yahoo.com/lookup/equity?s={search_term}&t=A&b=0&c=25"
                    body = call_url(url, hdr)
                    all_num = get_counts(body, search_term)

                    try:
                        all_num = int(all_num)
                    except ValueError:
                        print(f"Error parsing number for {search_term}: {all_num}")
                        continue  # Skip this search term if parsing fails

                    if all_num < 9000:
                        process_block(body, search_term, yh_all_sym, hdr)
                    else:
                        for term_4 in search_set:
                            search_term = term_1 + term_2 + term_3 + term_4
                            process_block(body, search_term, yh_all_sym, hdr)

    # Output the results as a list of dictionaries
    for entry in yh_all_sym:
        print(f"{entry['company_name']}: {entry['symbol']}")
    return yh_all_sym

In [8]:
yh_all_sym = main()

Error with URL https://finance.yahoo.com/lookup/equity?s=A5&t=A&b=0&c=100: 404 Client Error: Not Found for url: https://finance.yahoo.com/lookup/equity/?s=A5&t=A&b=0&c=100
Error with URL https://finance.yahoo.com/lookup/equity?s=CI&t=A&b=0&c=100: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
Error with URL https://finance.yahoo.com/lookup/equity?s=EF&t=A&b=0&c=25: 404 Client Error: Not Found for url: https://finance.yahoo.com/lookup/equity/?s=EF&t=A&b=0&c=25
Error with URL https://finance.yahoo.com/lookup/equity?s=EV&t=A&b=0&c=100: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
Error with URL https://finance.yahoo.com/lookup/equity?s=FK&t=A&b=0&c=100: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
Error with URL https://finance.yahoo.com/lookup/equity?s=IV&t=A&b=0

In [10]:
print(yh_all_sym)

None
