In [1]:
import yfinance as yf
import pandas as pd
from datetime import datetime
from datetime import datetime
import uuid
import concurrent.futures
import requests

In [2]:

def get_sp500_tickers():
    """
    Extrae la lista de tickers y nombres de empresas del S&P 500 desde Wikipedia utilizando requests.

    Returns:
        pd.DataFrame: Un DataFrame con dos columnas, 'Symbol' y 'Security', que contienen
                      los tickers y los nombres de las empresas respectivamente.
    """
    url = 'https://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    response = requests.get(url, verify=False)  # Desactivar verificación SSL
    sp500_table = pd.read_html(response.text)[0]
    return sp500_table[['Symbol', 'Security']]

def SP_500_Stockdata(start_date='2000-01-01'):
    """
    Extrae datos históricos de precios de las empresas del S&P 500 desde Yahoo Finance,
    combinándolos en un único DataFrame con un ID único para cada empresa y una columna de timestamp
    para indicar la fecha de extracción. Incluye el nombre completo de cada empresa.

    Args:
        start_date (str): Fecha de inicio en formato 'YYYY-MM-DD'. Por defecto es '2000-01-01'.
        
    Returns:
        pd.DataFrame: Un DataFrame con los datos históricos de precios de las empresas del S&P 500,
                      incluyendo el ticker, un ID único, el nombre de la empresa y el timestamp de extracción.
    """
    # Extraer tickers y nombres de empresas desde Wikipedia
    sp500_data = get_sp500_tickers()
    
    # Fecha de finalización como la fecha actual
    end_date = datetime.now().strftime('%Y-%m-%d')
    extraction_timestamp = datetime.now()
    
    # DataFrame para almacenar los datos combinados
    all_data = pd.DataFrame()
    
    # Iterar sobre cada fila para extraer el ticker y nombre de la empresa
    for _, row in sp500_data.iterrows():
        ticker = row['Symbol']
        company_name = row['Security']
        
        # Generar un ID único persistente para cada empresa basado en el ticker
        company_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, ticker))
        
        # Descargar datos de precios de Yahoo Finance
        stock_data = yf.download(ticker, start=start_date, end=end_date)
        
        # Filtrar solo las columnas necesarias y agregar las columnas adicionales
        stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
        stock_data['Ticker'] = ticker
        stock_data['Company_Name'] = company_name
        stock_data['Extraction_Timestamp'] = extraction_timestamp
        stock_data['Company_ID'] = company_id

                # Reiniciar el índice para que Date sea una columna en lugar de índice
        stock_data = stock_data.reset_index()
        
        # Concatenar al DataFrame principal
        all_data = pd.concat([all_data, stock_data], ignore_index=True)
    
    return all_data



In [3]:
sp500_tickers=get_sp500_tickers()

  sp500_table = pd.read_html(response.text)[0]


In [4]:
sp500_tickers

Unnamed: 0,Symbol,Security
0,MMM,3M
1,AOS,A. O. Smith
2,ABT,Abbott Laboratories
3,ABBV,AbbVie
4,ACN,Accenture
...,...,...
498,XYL,Xylem Inc.
499,YUM,Yum! Brands
500,ZBRA,Zebra Technologies
501,ZBH,Zimmer Biomet


In [2]:
sp500_tickers = [
    'A', 'AAPL', 'ABBV', 'ABMD', 'ACN', 'ADBE', 'ADI', 'ADM', 'ADP', 'ADSK', 'AEE', 'AEP', 'AES', 'AFL',
    'AIG', 'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN', 'ALK', 'ALL', 'AMAT', 'AMCR', 'AMD', 'AME', 'AMGN', 'AMP',
    'AMT', 'AMZN', 'ANET', 'ANSS', 'AON', 'AOS', 'APA', 'APD', 'APH', 'ARE', 'ATO', 'ATVI', 'AVB', 'AVGO',
    'AVY', 'AWK', 'AXP', 'AZO', 'BA', 'BAC', 'BALL', 'BAX', 'BBWI', 'BBY', 'BIO', 'BK', 'BKNG', 'BKR',
    'BLK', 'BLL', 'BMY', 'BR', 'BRK.B', 'BRO', 'BSX', 'BWA', 'BXP', 'C', 'CAG', 'CAH', 'CARR', 'CAT',
    'CB', 'CBOE', 'CBRE', 'CCI', 'CCL', 'CDAY', 'CDNS', 'CDW', 'CE', 'CERN', 'CF', 'CFG', 'CHD', 'CHRW',
    'CHTR', 'CI', 'CINF', 'CL', 'CLX', 'CMA', 'CMCSA', 'CME', 'CMG', 'CMS', 'CNC', 'CNP', 'COF', 'COO',
    'COP', 'COST', 'CPB', 'CRL', 'CRM', 'CSCO', 'CSX', 'CTAS', 'CTLT', 'CTRA', 'CTSH', 'CTVA', 'CVS', 'CVX',
    'D', 'DAL', 'DD', 'DE', 'DFS', 'DG', 'DGX', 'DHI', 'DHR', 'DIS', 'DISCA', 'DISCK', 'DISH', 'DLR', 'DLTR',
    'DOV', 'DOW', 'DPZ', 'DRE', 'DRI', 'DTE', 'DUK', 'DVA', 'DVN', 'DXCM', 'EA', 'EBAY', 'ECL', 'ED', 'EFX',
    'EIX', 'EL', 'EMN', 'EMR', 'ENPH', 'EOG', 'EPAM', 'EQIX', 'EQR', 'ES', 'ESS', 'ETN', 'ETR', 'EVRG',
    'EW', 'EXC', 'EXPD', 'EXPE', 'EXR', 'F', 'FANG', 'FAST', 'FBHS', 'FCX', 'FDX', 'FE', 'FFIV', 'FIS',
    'FISV', 'FITB', 'FLIR', 'FLS', 'FLT', 'FMC', 'FOX', 'FOXA', 'FRC', 'FRT', 'FTNT', 'FTV', 'GD', 'GE',
    'GILD', 'GIS', 'GL', 'GLW', 'GM', 'GNRC', 'GOOG', 'GOOGL', 'GPC', 'GPN', 'GRMN', 'GS', 'GWW', 'HAL',
    'HAS', 'HBAN', 'HBI', 'HCA', 'HD', 'HES', 'HIG', 'HII', 'HLT', 'HOLX', 'HON', 'HPE', 'HPQ', 'HRL',
    'HSIC', 'HST', 'HSY', 'HUM', 'HWM', 'IBM', 'ICE', 'IDXX', 'IEX', 'IFF', 'ILMN', 'INCY', 'INTC', 'INTU',
    'IP', 'IPG', 'IPGP', 'IQV', 'IR', 'IRM', 'ISRG', 'IT', 'ITW', 'IVZ', 'J', 'JBHT', 'JCI', 'JKHY', 'JNJ',
    'JNPR', 'JPM', 'K', 'KEY', 'KEYS', 'KHC', 'KIM', 'KLAC', 'KMB', 'KMI', 'KO', 'KR', 'L', 'LDOS', 'LEG',
    'LEN', 'LH', 'LHX', 'LIN', 'LKQ', 'LMT', 'LNC', 'LNT', 'LOW', 'LRCX', 'LUMN', 'LUV', 'LW', 'LYB', 'LYV',
    'MA', 'MAA', 'MAR', 'MAS', 'MCD', 'MCHP', 'MCK', 'MCO', 'MDLZ', 'MDT', 'MET', 'MGM', 'MHK', 'MKC', 'MKTX',
    'MLM', 'MMC', 'MMM', 'MNST', 'MO', 'MOS', 'MPC', 'MPWR', 'MRK', 'MRO', 'MS', 'MSCI', 'MSFT', 'MSI', 'MTB',
    'MTD', 'MU', 'NCLH', 'NDAQ', 'NDSN', 'NEE', 'NEM', 'NFLX', 'NI', 'NKE', 'NLOK', 'NLSN', 'NOC', 'NOV',
    'NOW', 'NRG', 'NSC', 'NTAP', 'NTRS', 'NUE', 'NVDA', 'NVR', 'NWL', 'NWS', 'NWSA', 'NXPI', 'O', 'ODFL',
    'OKE', 'OMC', 'ORCL', 'ORLY', 'OTIS', 'OXY', 'PARA', 'PAYC', 'PAYX', 'PBCT', 'PCAR', 'PEAK', 'PEG', 'PENN',
    'PEP', 'PFE', 'PFG', 'PG', 'PGR', 'PH', 'PHM', 'PKG', 'PKI', 'PLD', 'PM', 'PNC', 'PNR', 'PNW', 'POOL',
    'PPG', 'PPL', 'PRGO', 'PRU', 'PSA', 'PSX', 'PVH', 'PWR', 'PXD', 'PYPL', 'QCOM', 'QRVO', 'RCL', 'RE', 'REG',
    'REGN', 'RF', 'RHI', 'RJF', 'RL', 'RMD', 'ROK', 'ROL', 'ROP', 'ROST', 'RSG', 'RTX', 'SBAC', 'SBNY', 'SBUX',
    'SCHW', 'SEE', 'SHW', 'SIVB', 'SJM', 'SLB', 'SNA', 'SNPS', 'SO', 'SPG', 'SPGI', 'SRE', 'STE', 'STT', 'STX',
    'STZ', 'SWK', 'SWKS', 'SYF', 'SYK', 'SYY', 'T', 'TAP', 'TDG', 'TDY', 'TEL', 'TER', 'TFC', 'TFX', 'TGT',
    'TJX', 'TMO', 'TMUS', 'TPR', 'TRMB', 'TROW', 'TRV', 'TSCO', 'TSLA', 'TSN', 'TT', 'TTWO', 'TWTR', 'TXN',
    'TXT', 'TYL', 'UAL', 'UDR', 'UHS', 'ULTA', 'UNH', 'UNM', 'UNP', 'UPS', 'URI', 'USB', 'V', 'VFC', 'VIAC',
    'VLO', 'VMC', 'VNO', 'VRSK', 'VRSN', 'VRTX', 'VTR', 'VTRS', 'VZ', 'WAB', 'WAT', 'WBA', 'WDC', 'WEC',
    'WELL', 'WFC', 'WHR', 'WLTW', 'WM', 'WMB', 'WMT', 'WRB', 'WRK', 'WST', 'WU', 'WY', 'WYNN', 'XEL', 'XLNX',
    'XOM', 'XRAY', 'XYL', 'YUM', 'ZBH', 'ZBRA', 'ZION', 'ZTS'
]

In [3]:
sp500_company_names = [
    'Agilent Technologies, Inc.',
    'Apple Inc.',
    'AbbVie Inc.',
    'Abiomed, Inc.',
    'Accenture plc',
    'Adobe Inc.',
    'Analog Devices, Inc.',
    'Archer-Daniels-Midland Company',
    'Automatic Data Processing, Inc.',
    'Autodesk, Inc.',
    'Ameren Corporation',
    'American Electric Power Company, Inc.',
    'The AES Corporation',
    'Aflac Incorporated',
    'American International Group, Inc.',
    'Assurant, Inc.',
    'Arthur J. Gallagher & Co.',
    'Akamai Technologies, Inc.',
    'Albemarle Corporation',
    'Align Technology, Inc.',
    'Alaska Air Group, Inc.',
    'The Allstate Corporation',
    'Applied Materials, Inc.',
    'Amcor plc',
    'Advanced Micro Devices, Inc.',
    'AMETEK, Inc.',
    'Amgen Inc.',
    'Ameriprise Financial, Inc.',
    'American Tower Corporation',
    'Amazon.com, Inc.',
    'Arista Networks, Inc.',
    'ANSYS, Inc.',
    'Aon plc',
    'A. O. Smith Corporation',
    'APA Corporation',
    'Air Products and Chemicals, Inc.',
    'Amphenol Corporation',
    'Alexandria Real Estate Equities, Inc.',
    'Atmos Energy Corporation',
    'Activision Blizzard, Inc.',
    'AvalonBay Communities, Inc.',
    'Broadcom Inc.',
    'Avery Dennison Corporation',
    'American Water Works Company, Inc.',
    'American Express Company',
    'AutoZone, Inc.',
    'The Boeing Company',
    'Bank of America Corporation',
    'Ball Corporation',
    'Baxter International Inc.',
    'Bath & Body Works, Inc.',
    'Best Buy Co., Inc.',
    'Bio-Rad Laboratories, Inc.',
    'The Bank of New York Mellon Corporation',
    'Booking Holdings Inc.',
    'Baker Hughes Company',
    'BlackRock, Inc.',
    'Bristol-Myers Squibb Company',
    'Broadridge Financial Solutions, Inc.',
    'Berkshire Hathaway Inc.',
    'Brown & Brown, Inc.',
    'Boston Scientific Corporation',
    'BorgWarner Inc.',
    'Boston Properties, Inc.',
    'Citigroup Inc.',
    'Conagra Brands, Inc.',
    'Cardinal Health, Inc.',
    'Carrier Global Corporation',
    'Caterpillar Inc.',
    'Chubb Limited',
    'Cboe Global Markets, Inc.',
    'CBRE Group, Inc.',
    'Crown Castle Inc.',
    'Carnival Corporation & plc',
    'Ceridian HCM Holding Inc.',
    'Cadence Design Systems, Inc.',
    'CDW Corporation',
    'Celanese Corporation',
    'Cerner Corporation',
    'CF Industries Holdings, Inc.',
    'Citizens Financial Group, Inc.',
    'Church & Dwight Co., Inc.',
    'C.H. Robinson Worldwide, Inc.',
    'Charter Communications, Inc.',
    'Cigna Corporation',
    'Cincinnati Financial Corporation',
    'Colgate-Palmolive Company',
    'The Clorox Company',
    'Comerica Incorporated',
    'Comcast Corporation',
    'CME Group Inc.',
    'Chipotle Mexican Grill, Inc.',
    'CMS Energy Corporation',
    'Centene Corporation',
    'CenterPoint Energy, Inc.',
    'Capital One Financial Corporation',
    'The Cooper Companies, Inc.',
    'ConocoPhillips',
    'Costco Wholesale Corporation',
    'Campbell Soup Company',
    'Charles River Laboratories International, Inc.',
    'Salesforce, Inc.',
    'Cisco Systems, Inc.',
    'CSX Corporation',
    'Cintas Corporation',
    'Catalent, Inc.',
    'Coterra Energy Inc.',
    'Cognizant Technology Solutions Corporation',
    'Corteva, Inc.',
    'CVS Health Corporation',
    'Chevron Corporation',
    'Dominion Energy, Inc.',
    'Delta Air Lines, Inc.',
    'DuPont de Nemours, Inc.',
    'Deere & Company',
    'Discover Financial Services',
    'Dollar General Corporation',
    'Quest Diagnostics Incorporated',
    'D.R. Horton, Inc.',
    'Danaher Corporation',
    'The Walt Disney Company',
    'Digital Realty Trust, Inc.',
    'Dollar Tree, Inc.',
    'Dover Corporation',
    'Dow Inc.',
    'Domino\'s Pizza, Inc.',
    'Duke Realty Corporation',
    'Darden Restaurants, Inc.',
    'DTE Energy Company',
    'Duke Energy Corporation',
    'DaVita Inc.',
    'Devon Energy Corporation',
    'DexCom, Inc.',
    'Electronic Arts Inc.',
    'eBay Inc.',
    'Ecolab Inc.',
    'Consolidated Edison, Inc.',
    'Equifax Inc.',
    'Edison International',
    'The Estée Lauder Companies Inc.',
    'Eastman Chemical Company',
    'Emerson Electric Co.',
    'Enphase Energy, Inc.',
    'EOG Resources, Inc.',
    'EPAM Systems, Inc.',
    'Equinix, Inc.',
    'Equity Residential',
    'Eversource Energy',
    'Essex Property Trust, Inc.',
    'Eaton Corporation plc',
    'Entergy Corporation',
    'Evergy, Inc.',
    'Edwards Lifesciences Corporation',
    'Exelon Corporation',
    'Expeditors International of Washington, Inc.',
    'Expedia Group, Inc.',
    'Extra Space Storage Inc.',
    'Ford Motor Company',
    'Diamondback Energy, Inc.',
    'Fastenal Company',
    'Fortune Brands Home & Security, Inc.',
    'Freeport-McMoRan Inc.',
    'FedEx Corporation',
    'FirstEnergy Corp.',
    'F5, Inc.',
    'Fidelity National Information Services, Inc.',
    'Fiserv, Inc.',
    'Fifth Third Bancorp',
    'FleetCor Technologies, Inc.',
    'FMC Corporation',
    'Fox Corporation',
    'First Republic Bank',
    'Federal Realty Investment Trust',
    'Fortinet, Inc.',
    'Fortive Corporation',
    'General Dynamics Corporation',
    'General Electric Company',
    'Gilead Sciences, Inc.',
    'General Mills, Inc.',
    'Globe Life Inc.',
    'Corning Incorporated',
    'General Motors Company',
    'Generac Holdings Inc.',
    'Alphabet Inc.',
    'Google Inc.',
    'Genuine Parts Company',
    'Global Payments Inc.',
    'Garmin Ltd.',
    'The Goldman Sachs Group, Inc.',
    'W.W. Grainger, Inc.',
    'Halliburton Company',
    'Hasbro, Inc.',
    'Huntington Bancshares Incorporated',
    'Hanesbrands Inc.',
    'HCA Healthcare, Inc.',
    'The Home Depot, Inc.',
    'Hess Corporation',
    'The Hartford Financial Services Group, Inc.',
    'Huntington Ingalls Industries, Inc.',
    'Hilton Worldwide Holdings Inc.',
    'Hologic, Inc.',
    'Honeywell International Inc.',
    'HP Inc.',
    'Hormel Foods Corporation',
    'Henry Schein, Inc.',
    'Host Hotels & Resorts, Inc.',
    'The Hershey Company',
    'Humana Inc.',
    'Howmet Aerospace Inc.',
    'International Business Machines Corporation',
    'Intercontinental Exchange, Inc.',
    'IDEXX Laboratories, Inc.',
    'IEX Corporation',
    'International Flavors & Fragrances Inc.',
    'Illumina, Inc.',
    'Incyte Corporation',
    'Intel Corporation',
    'Intuit Inc.',
    'International Paper Company',
    'The Interpublic Group of Companies, Inc.',
    'IPG Photonics Corporation',
    'IQVIA Holdings Inc.',
    'Ingersoll Rand Inc.',
    'Iron Mountain Incorporated',
    'Intuitive Surgical, Inc.',
    'Gartner, Inc.',
    'Illinois Tool Works Inc.',
    'Invesco Ltd.',
    'Jacobs Engineering Group Inc.',
    'J.B. Hunt Transport Services, Inc.',
    'Johnson Controls International plc',
    'Jack Henry & Associates, Inc.',
    'Johnson & Johnson',
    'Juniper Networks, Inc.',
    'JPMorgan Chase & Co.',
    'The Kellogg Company',
    'KeyCorp',
    'Keysight Technologies Inc.',
    'The Kraft Heinz Company',
    'Kroger Co.',
    'Laboratory Corporation of America Holdings',
    'LEIDOS Holdings, Inc.',
    'Lennar Corporation',
    'Linde plc',
    'LKQ Corporation',
    'Lockheed Martin Corporation',
    'Lincoln National Corporation',
    'Alliant Energy Corporation',
    'Lowe\'s Companies, Inc.',
    'Lam Research Corporation',
    'Southwest Airlines Co.',
    'Lumen Technologies, Inc.',
    'LyondellBasell Industries N.V.',
    'Marriott International, Inc.',
    'The Marsh & McLennan Companies, Inc.',
    'Mastercard Incorporated',
    'M&T Bank Corporation',
    'McDonald\'s Corporation',
    'Medtronic plc',
    'MetLife, Inc.',
    'Mohawk Industries, Inc.',
    'Molina Healthcare, Inc.',
    'McKesson Corporation',
    'Mondelez International, Inc.',
    'The Mosaic Company',
    'Moderna, Inc.',
    'Monster Beverage Corporation',
    'Morgan Stanley',
    'Microsoft Corporation',
    'Microchip Technology Incorporated',
    'MSCI Inc.',
    'The Walt Disney Company',
    'Visa Inc.',
    'Pfizer Inc',
    'Principal Financial Group, Inc.',
    'Procter & Gamble Co.',
    'Progressive Corporation',
    'Parker-Hannifin Corporation',
    'PulteGroup, Inc.',
    'Packaging Corporation of America',
    'PerkinElmer, Inc.',
    'Prologis, Inc.',
    'Philip Morris International Inc.',
    'PNC Financial Services Group, Inc.',
    'Pentair plc',
    'Pinnacle West Capital Corporation',
    'Pool Corporation',
    'PPG Industries, Inc.',
    'PPL Corporation',
    'Perrigo Company plc',
    'Prudential Financial, Inc.',
    'Public Storage',
    'Phillips 66',
    'PVH Corp.',
    'Quanta Services, Inc.',
    'Pioneer Natural Resources Company',
    'PayPal Holdings, Inc.',
    'QUALCOMM Incorporated',
    'Qorvo, Inc.',
    'Royal Caribbean Group',
    'Everest Re Group, Ltd.',
    'Regency Centers Corporation',
    'Regeneron Pharmaceuticals, Inc.',
    'Regions Financial Corporation',
    'Robert Half International Inc.',
    'Raymond James Financial, Inc.',
    'Ralph Lauren Corporation',
    'ResMed Inc.',
    'Rockwell Automation, Inc.',
    'Rollins, Inc.',
    'Roper Technologies, Inc.',
    'Ross Stores, Inc.',
    'Republic Services, Inc.',
    'Raytheon Technologies Corporation',
    'SBA Communications Corporation',
    'Signature Bank',
    'Starbucks Corporation',
    'The Charles Schwab Corporation',
    'Sealed Air Corporation',
    'Sherwin-Williams Company',
    'SVB Financial Group',
    'The J. M. Smucker Company',
    'Schlumberger Limited',
    'Snap-on Incorporated',
    'Synopsys, Inc.',
    'The Southern Company',
    'Simon Property Group, Inc.',
    'S&P Global Inc.',
    'Sempra Energy',
    'STERIS plc',
    'State Street Corporation',
    'Seagate Technology Holdings plc',
    'Constellation Brands, Inc.',
    'Stanley Black & Decker, Inc.',
    'Skyworks Solutions, Inc.',
    'Synchrony Financial',
    'Stryker Corporation',
    'Sysco Corporation',
    'AT&T Inc.',
    'Molson Coors Beverage Company',
    'TransDigm Group Incorporated',
    'Teledyne Technologies Incorporated',
    'TE Connectivity Ltd.',
    'Teradyne, Inc.',
    'Truist Financial Corporation',
    'Teleflex Incorporated',
    'Target Corporation',
    'The TJX Companies, Inc.',
    'Thermo Fisher Scientific Inc.',
    'T-Mobile US, Inc.',
    'Tapestry, Inc.',
    'Trimble Inc.',
    'T. Rowe Price Group, Inc.',
    'The Travelers Companies, Inc.',
    'Tractor Supply Company',
    'Tesla, Inc.',
    'Tyson Foods, Inc.',
    'Trane Technologies plc',
    'Take-Two Interactive Software, Inc.',
    'Texas Instruments Incorporated',
    'Textron Inc.',
    'Tyler Technologies, Inc.',
    'United Airlines Holdings, Inc.',
    'UDR, Inc.',
    'Universal Health Services, Inc.',
    'Ulta Beauty, Inc.',
    'UnitedHealth Group Incorporated',
    'Unum Group',
    'Union Pacific Corporation',
    'United Parcel Service, Inc.',
    'United Rentals, Inc.',
    'U.S. Bancorp',
    'Visa Inc.',
    'VF Corporation',
    'ViacomCBS Inc.',
    'Valero Energy Corporation',
    'Vulcan Materials Company',
    'Vornado Realty Trust',
    'Verisk Analytics, Inc.',
    'VeriSign, Inc.',
    'Vertex Pharmaceuticals Incorporated',
    'Ventas, Inc.',
    'ViacomCBS Inc.',
    'The Western Union Company',
    'The Williams Companies, Inc.',
    'Wells Fargo & Company',
    'Whirlpool Corporation',
    'Willis Towers Watson Public Limited Company',
    'Walmart Inc.',
    'Waste Management, Inc.',
    'W. R. Berkley Corporation',
    'WestRock Company',
    'West Pharmaceutical Services, Inc.',
    'The Walt Disney Company',
    'Xcel Energy Inc.',
    'Exxon Mobil Corporation',
    'Xylem Inc.',
    'Yum! Brands, Inc.',
    'Zimmer Biomet Holdings, Inc.',
    'Zebra Technologies Corporation',
    'Zions Bancorporation, N.A.',
    'Zoetis Inc.'
]

In [6]:

def SP_500_Stockdata(tickers, company_names, start_date='2000-01-01'):
    """
    Extrae datos históricos de precios de las empresas del S&P 500 desde Yahoo Finance,
    combinándolos en un único DataFrame con un ID único para cada empresa y una columna de timestamp
    para indicar la fecha de extracción. Incluye el nombre completo de cada empresa.

    Args:
        tickers (list): Lista de tickers de empresas.
        company_names (list): Lista de nombres de empresas correspondientes a los tickers.
        start_date (str): Fecha de inicio en formato 'YYYY-MM-DD'. Por defecto es '2000-01-01'.
        
    Returns:
        pd.DataFrame: Un DataFrame con los datos históricos de precios de las empresas del S&P 500,
                      incluyendo el ticker, un ID único, el nombre de la empresa y el timestamp de extracción.
    """
    
    # Fecha de finalización como la fecha actual
    end_date = datetime.now().strftime('%Y-%m-%d')
    extraction_timestamp = datetime.now()
    
    # DataFrame para almacenar los datos combinados
    all_data = pd.DataFrame()
    
    # Iterar sobre cada ticker y nombre de empresa
    for ticker, company_name in zip(tickers, company_names):
        # Generar un ID único persistente para cada empresa basado en el ticker
        company_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, ticker))
        
        # Descargar datos de precios de Yahoo Finance
        stock_data = yf.download(ticker, start=start_date, end=end_date)
        
        # Filtrar solo las columnas necesarias y agregar las columnas adicionales
        stock_data = stock_data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']]
        stock_data['Ticker'] = ticker
        stock_data['Company_Name'] = company_name
        stock_data['Extraction_Timestamp'] = extraction_timestamp
        stock_data['Company_ID'] = company_id
        
        # Reiniciar el índice para que Date sea una columna en lugar de índice
        stock_data = stock_data.reset_index()
        
        # Concatenar al DataFrame principal
        all_data = pd.concat([all_data, stock_data], ignore_index=True)
    
    return all_data




In [5]:

SP_500_data = SP_500_Stockdata()


  sp500_table = pd.read_html(response.text)[0]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of

: 

In [32]:
SP_500_data.head()

In [None]:
SP_500_data.to_csv("sp500_historial_data_2000_2024.csv", index=False)

In [11]:
def fetch_stock_data(ticker, start_date, end_date, extraction_timestamp):
    try:
        # Genera un ID único para la empresa
        company_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, ticker))
        
        # Información de la empresa
        stock_info = yf.Ticker(ticker).info
        company_name = stock_info.get('longName', 'N/A') 
        
        # Descarga de datos históricos de precios
        stock_data = yf.download(ticker, start=start_date, end=end_date)
        
        # Agregar columnas adicionales al DataFrame
        stock_data['Ticker'] = ticker
        stock_data['Company_ID'] = company_id
        stock_data['Company_Name'] = company_name
        stock_data['Extraction_Timestamp'] = extraction_timestamp

        return stock_data
    
    except Exception as e:
        print(f"Error al descargar datos para {ticker}: {e}")
        return pd.DataFrame()  # Retorna un DataFrame vacío en caso de error

def SP_500_Stockdata(tickers, start_date='2000-01-01'):
    """
    Extrae datos históricos de precios de las empresas del S&P 500 desde Yahoo Finance,
    utilizando paralelización para acelerar el proceso.
    """
    end_date = datetime.now().strftime('%Y-%m-%d')
    extraction_timestamp = datetime.now()
    
    all_data = pd.DataFrame()

    # Usar ThreadPoolExecutor para ejecutar múltiples descargas en paralelo
    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        # Crear tareas para cada ticker
        futures = [executor.submit(fetch_stock_data, ticker, start_date, end_date, extraction_timestamp) for ticker in tickers]
        
        # Combina los resultados cuando estén disponibles
        for future in concurrent.futures.as_completed(futures):
            data = future.result()
            all_data = pd.concat([all_data, data], ignore_index=True)
    
    return all_data


In [13]:
# Llamada a la función principal para obtener los datos históricos
data = SP_500_Stockdata(sp500_tickers, start_date='2000-01-01')

# Mostramos los primeros registros del DataFrame resultante
print(data.head())


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ABMD?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ABMD&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed






Error al descargar datos para AFL: No objects to concatenateError al descargar datos para ALB: No objects to concatenate

Error al descargar datos para AKAM: No objects to concatenate
Error al descargar datos para AEE: No objects to concatenate
Error al descargar datos para AES: No objects to concatenate


[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed






[*********************100%***********************]  1 of 1 completed






[*********************100%***********************]  1 of 1 completed


Error al descargar datos para ANSS: No objects to concatenateError al descargar datos para AON: No objects to concatenate

Error al descargar datos para ANET: No objects to concatenate
Error al descargar datos para APD: No objects to concatenate


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/ATVI?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=ATVI&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed








[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Error al descargar datos para CAH: No objects to concatenate


[*********************100%***********************]  1 of 1 completed


404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/CDAY?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=CDAY&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed









[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed








[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed



[*********************100%***********************]  1 of 1 completed
[*****************

Error al descargar datos para EA: No objects to concatenateError al descargar datos para EL: No objects to concatenate
Error al descargar datos para DXCM: No objects to concatenate
Error al descargar datos para EFX: No objects to concatenate
Error al descargar datos para EMN: No objects to concatenate
Error al descargar datos para ED: No objects to concatenate



[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed





[*********************100%***********************]  1 of 1 completed

Error al descargar datos para EVRG: No objects to concatenateError al descargar datos para F: No objects to concatenate
Error al descargar datos para FANG: No objects to concatenate






[*********************100%***********************]  1 of 1 completed





[*********************100%***********************]  1 of 1 completed

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/FLT?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=FLT&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed








[*********************100%***********************]  1 of 1 completed









[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed



[*********************100%***********************]  1 of 1 completed


Error al descargar datos para HBAN: dictionary changed size during iteration
Error al descargar datos para HES: No objects to concatenate


[*********************100%***********************]  1 of 1 completed



[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed




Error al descargar datos para HIG: No objects to concatenate
Error al descargar datos para HSIC: No objects to concatenate
Error al descargar datos para HII: No objects to concatenate
Error al descargar datos para HLT: No objects to concatenate


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed






[*********************100%***********************]  1 of 1 completed

Error al descargar datos para IEX: No objects to concatenateError al descargar datos para IDXX: No objects to concatenate
Error al descargar datos para IBM: No objects to concatenate
Error al descargar datos para HSY: No objects to concatenate
Error al descargar datos para HWM: No objects to concatenate

Error al descargar datos para HST: No objects to concatenate
Error al descargar datos para ICE: No objects to concatenate


[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed









[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed





[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed



Error al descargar datos para KEYS: No objects to concatenate
Error al descargar datos para KLAC: No objects to concatenate
Error al descargar datos para KMB: No objects to concatenate
Error al descargar datos para KIM: No objects to concatenate
Error al descargar datos para KEY: No objects to concatenate
Error al descargar datos para KHC: No objects to concatenate
Error al descargar datos para KO: 'DVN'
Error al descargar datos para LDOS: No objects to concatenate


[*********************100%***********************]  1 of 1 completed






Error al descargar datos para LEG: 'DUK'
Error al descargar datos para LHX: 'DUK'


[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed





[*********************100%***********************]  1 of 1 completed

Error al descargar datos para LOW: No objects to concatenate
Error al descargar datos para LNT: No objects to concatenate
Error al descargar datos para LNC: No objects to concatenate





[*********************100%***********************]  1 of 1 completed






[*********************100%***********************]  1 of 1 completed



Error al descargar datos para MCD: No objects to concatenateError al descargar datos para MET: No objects to concatenate
Error al descargar datos para MAS: No objects to concatenate
Error al descargar datos para MCHP: No objects to concatenate
Error al descargar datos para MCO: No objects to concatenate
Error al descargar datos para MAR: No objects to concatenate

Error al descargar datos para MDLZ: No objects to concatenate
Error al descargar datos para MCK: No objects to concatenate
Error al descargar datos para MGM: No objects to concatenate


[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed


Error al descargar datos para MKTX: No objects to concatenate
Error al descargar datos para MHK: No objects to concatenate
Error al descargar datos para MLM: No objects to concatenate
Error al descargar datos para MOS: No objects to concatenate


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed








[*********************100%***********************]  1 of 1 completed








[*********************100%***********************]  1 of 1 completed


Error al descargar datos para MU: No objects to concatenateError al descargar datos para NEE: No objects to concatenate

Error al descargar datos para NDSN: No objects to concatenate
Error al descargar datos para NI: No objects to concatenate


[*********************100%***********************]  1 of 1 completed









[*********************100%***********************]  1 of 1 completed




Error al descargar datos para NOV: dictionary changed size during iterationError al descargar datos para NRG: dictionary changed size during iteration

Error al descargar datos para NTRS: dictionary changed size during iteration
Error al descargar datos para NVR: No objects to concatenate
Error al descargar datos para O: No objects to concatenate
Error al descargar datos para NXPI: No objects to concatenate



[*********************100%***********************]  1 of 1 completed





[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed



404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/PEAK?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=PEAK&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed







Error al descargar datos para PAYC: No objects to concatenateError al descargar datos para PCAR: No objects to concatenate



[*********************100%***********************]  1 of 1 completed

1 Failed download:


1 Failed download:
['FLT']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
['FLT']: YFTzMissingError('$%ticker%: possibly delisted; no timezone found')
[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed





Error al descargar datos para PNW: No objects to concatenateError al descargar datos para POOL: No objects to concatenate

Error al descargar datos para PKI: No objects to concatenate


[*********************100%***********************]  1 of 1 completed

1 Failed download:
[*********************100%***********************]  1 of 1 completed


Error al descargar datos para PRU: No objects to concatenate







404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/PXD?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=PXD&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed

404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/RE?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=RE&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed









[*********************100%***********************]  1 of 1 completed


[*********************100%***********************]  1 of 1 completed





[*********************100%*****************

Error al descargar datos para ROP: No objects to concatenate
Error al descargar datos para ROST: No objects to concatenate
Error al descargar datos para SCHW: No objects to concatenate
Error al descargar datos para SBAC: No objects to concatenate
Error al descargar datos para SBUX: No objects to concatenate
Error al descargar datos para RTX: No objects to concatenate
Error al descargar datos para RSG: No objects to concatenate
Error al descargar datos para SEE: No objects to concatenate





[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed





Error al descargar datos para SJM: No objects to concatenate


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed








[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed

Error al descargar datos para TAP: No objects to concatenate
Error al descargar datos para TDY: dictionary changed size during iteration


[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed




[*********************100%***********************]  1 of 1 completed

Error al descargar datos para TFX: No objects to concatenateError al descargar datos para TMO: No objects to concatenate






[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed










Error al descargar datos para UAL: dictionary changed size during iteration
Error al descargar datos para UHS: dictionary changed size during iteration


[*********************100%***********************]  1 of 1 completed









[*********************100%***********************]  1 of 1 completed

[*********************100%***********************]  1 of 1 completed











Error al descargar datos para VRSN: dictionary changed size during iteration
Error al descargar datos para WAB: dictionary changed size during iteration


[*********************100%***********************]  1 of 1 completed




404 Client Error: Not Found for url: https://query2.finance.yahoo.com/v10/finance/quoteSummary/WRK?modules=financialData%2CquoteType%2CdefaultKeyStatistics%2CassetProfile%2CsummaryDetail&corsDomain=finance.yahoo.com&formatted=false&symbol=WRK&crumb=U9X8kwlqtcw
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed







[*********************100%***********************]  1 of 1 completed







Error al descargar datos para ZION: No objects to concatenate
Error al descargar datos para XOM: No objects to concatenate
Error al descargar datos para XYL: No objects to concatenate
Error al descargar datos para ZBH: No objects to concatenate
Error al descargar datos para YUM: No objects to concatenate
Error al descargar datos para ZTS: No objects to concatenate


[*********************100%***********************]  1 of 1 completed



Price   Adj Close      Close       High        Low       Open      Volume  \
Ticker       ABBV       ABBV       ABBV       ABBV       ABBV        ABBV   
0       21.629181  35.119999  35.400002  34.099998  34.919998  13767900.0   
1       21.450579  34.830002  35.000000  34.160000  35.000000  16739300.0   
2       21.179594  34.389999  34.889999  34.250000  34.619999  21372100.0   
3       21.222706  34.459999  35.450001  34.150002  34.150002  17897100.0   
4       20.760809  33.709999  34.639999  33.360001  34.290001  17863300.0   

Price  Ticker                            Company_ID    Company_Name  \
Ticker                                                                
0        ADSK  cefb5ce8-37a1-512d-9436-191fd0731666  Autodesk, Inc.   
1        ADSK  cefb5ce8-37a1-512d-9436-191fd0731666  Autodesk, Inc.   
2        ADSK  cefb5ce8-37a1-512d-9436-191fd0731666  Autodesk, Inc.   
3        ADSK  cefb5ce8-37a1-512d-9436-191fd0731666  Autodesk, Inc.   
4        ADSK  cefb5ce8-37a1-512d-

In [14]:
data.head()

Price,Adj Close,Close,High,Low,Open,Volume,Ticker,Company_ID,Company_Name,Extraction_Timestamp,...,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Ticker,ABBV,ABBV,ABBV,ABBV,ABBV,ABBV,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,...,JPM,KEY,JPM,KEY,JPM,KEY,JPM,KEY,JPM,KEY
0,21.629181,35.119999,35.400002,34.099998,34.919998,13767900.0,ADSK,cefb5ce8-37a1-512d-9436-191fd0731666,"Autodesk, Inc.",2024-10-27 18:52:20.632560,...,,,,,,,,,,
1,21.450579,34.830002,35.0,34.16,35.0,16739300.0,ADSK,cefb5ce8-37a1-512d-9436-191fd0731666,"Autodesk, Inc.",2024-10-27 18:52:20.632560,...,,,,,,,,,,
2,21.179594,34.389999,34.889999,34.25,34.619999,21372100.0,ADSK,cefb5ce8-37a1-512d-9436-191fd0731666,"Autodesk, Inc.",2024-10-27 18:52:20.632560,...,,,,,,,,,,
3,21.222706,34.459999,35.450001,34.150002,34.150002,17897100.0,ADSK,cefb5ce8-37a1-512d-9436-191fd0731666,"Autodesk, Inc.",2024-10-27 18:52:20.632560,...,,,,,,,,,,
4,20.760809,33.709999,34.639999,33.360001,34.290001,17863300.0,ADSK,cefb5ce8-37a1-512d-9436-191fd0731666,"Autodesk, Inc.",2024-10-27 18:52:20.632560,...,,,,,,,,,,
