In [2]:
import pandas as pd
import requests

def get_ticker_cik_mapping():
    url = "https://www.sec.gov/include/ticker.txt"
    headers = {'User-Agent': 'Your Name yourname@example.com'}  # Replace with your actual information
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Check for HTTP errors
    content = response.text

    ticker_cik = {}
    for line in content.splitlines():
        parts = line.strip().split('\t')
        if len(parts) == 2:
            t, c = parts
            ticker_cik[t.upper()] = c.zfill(10)
    return ticker_cik

# Read S&P 500 components
url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
tables = pd.read_html(url)
df = tables[0]

# Rename columns
df = df.rename(columns={
    'Symbol': 'Ticker',
    'Security': 'Company',
    'GICS Sector': 'Sector',
    'GICS Sub-Industry': 'Sub-Industry',
    'Headquarters Location': 'Headquarters',
    'Date added': 'Date_Added',
    'Founded': 'Founded_Year'
})

# Get the ticker to CIK mapping
ticker_cik = get_ticker_cik_mapping()

# Add CIK column
df['CIK'] = df['Ticker'].apply(lambda x: ticker_cik.get(x.upper(), ''))

# Reorder columns
columns_order = ['Ticker', 'CIK', 'Company', 'Sector', 'Sub-Industry', 'Headquarters', 'Date_Added', 'Founded_Year']
df = df[columns_order]

# Save to CSV
df.to_csv('data/sp500_companies.csv', index=False)

print("CSV file 'sp500_companies.csv' has been created.")


CSV file 'sp500_companies.csv' has been created.
