# Imports and API key

In [None]:
import pandas as pd
import numpy as np
import earningscall
import requests
import os
from pathlib import Path
from bs4 import BeautifulSoup
from datetime import datetime
import json
from earningscall import get_company
import earningscall

earningscall.api_key = "***"

# Company Class

In [None]:
class Company:
    def __init__(self, company_code: str):
        self.company_code = company_code.lower()
        self.company = self._initialize_company()

    def _initialize_company(self) -> earningscall.company.Company:
        """
        Initializes the company object using the given company code.
        """
        return get_company(self.company_code)

    def get_and_save_one_transcript(self, base_path: str, year: int, quarter: int, level=3) -> None:
        """
        Fetches and saves a single earnings call transcript for the specified year and quarter.
        """
        print(f"Fetching transcript for {self.company_code.upper()} Q{quarter} {year}..")
        transcript = self.company.get_transcript(year=year, quarter=quarter, level=level)

        path = f'{base_path}/{self.company_code.upper()}/{year}/{quarter}'

        if transcript:
            self.save_transcript(transcript, path)
            print(f"Transcript found and loaded. Q{quarter} {year}. [OK]")
        else:
            print(f"No transcript found. Q{quarter} {year}. [ERROR]")

    def get_and_save_all_transcripts_and_audio(self, base_path: str, level=3) -> None:
        """
        Fetches and saves all available earnings call transcripts for the company.
        """
        print(f"Fetching all transcripts for {self.company_code.upper()}..")

        for event in self.company.events():
            # Skip future events
            if datetime.now().timestamp() < event.conference_date.timestamp():
                print(f"* {self.company.company_info.symbol} Q{event.quarter} {event.year} -- skipping, conference date in the future")
                continue

            transcript = self.company.get_transcript(event=event, level=level)

            path = f'{base_path}/{self.company_code.upper()}/{event.year}/{event.quarter}'

            if transcript:
                self.save_transcript(transcript, path)
                self.company.download_audio_file(event=event, file_name=path + '/audio.mp3')
                print(f"Transcript and audio found and loaded. Q{event.quarter} {event.year}. [OK]")
            else:
                print(f"No transcript found. Q{event.quarter} {event.year}. [ERROR]")
        print("-" * 150)
    
    @staticmethod
    def save_transcript(transcript, path: str) -> None:
        """
        Saves the transcript dictionary to a JSON file.
        """
        # Ensure the directory exists
        Path(path).mkdir(parents=True, exist_ok=True)
        file_path = os.path.join(path, 'text.json')
        
        try:
            with open(file_path, "w", encoding="utf-8") as archivo:
                json.dump(transcript.to_dict(), archivo, indent=4, ensure_ascii=False)
        except Exception as e:
            print(f"Failed to save transcript: {e}")


# Get S&P500 Data

In [3]:
# # URL de la página web
# url = 'https://earningscall.biz/sp-500-holdings'

# # Realizar la solicitud HTTP
# response = requests.get(url)
# response.raise_for_status()  # Verificar que la solicitud fue exitosa

# # Analizar el contenido HTML
# soup = BeautifulSoup(response.text, 'html.parser')

# table = soup.find('table')
# # Extraer los encabezados de la tabla
# headers = [header.text.strip() for header in table.find_all('th')]

# # Extraer las filas de la tabla
# rows = []
# for row in table.find_all('tr')[1:]:  # Omitir el primer 'tr' que contiene los encabezados
#     cols = [col.text.strip() for col in row.find_all('td')]
#     rows.append(cols)

# # Crear un DataFrame de pandas con los datos
# df = pd.DataFrame(rows, columns=headers)

# df.to_csv('S&P500_Data.csv')
df = pd.read_csv('S&P500_Data.csv')

In [4]:
df.rename({"#":'RANKING_POTITION'}, axis=1, inplace=True)

SP500_data = (df.groupby("Sector").head(3).reset_index(drop=True))

# Get transcriptions

In [5]:
print(SP500_data.shape)
SP500_data.head(5)

(33, 7)


Unnamed: 0.1,Unnamed: 0,RANKING_POTITION,Company,Symbol,Sector,Stock Price,Employees
0,0,1,NVIDIA Corporation,NVDA,Technology,145.78,29600
1,1,2,Apple Inc.,AAPL,Technology,227.36,164000
2,2,3,Microsoft Corporation,MSFT,Technology,415.61,228000
3,3,4,"Amazon.com, Inc.",AMZN,Consumer Cyclical,202.04,1551000
4,4,5,"Meta Platforms, Inc.",META,Communication Services,565.68,72404


In [None]:
base_path = 'companies'

for company_code in list(SP500_data['Symbol']):
    company = Company(company_code)
    company.get_and_save_all_transcripts_and_audio(base_path)

Fetching all transcripts for NVDA..
No transcript found. Q3 2025. [ERROR]
Transcript and audio found and loaded. Q2 2025. [OK]
Transcript and audio found and loaded. Q1 2025. [OK]
Transcript and audio found and loaded. Q4 2024. [OK]
Transcript and audio found and loaded. Q3 2024. [OK]
Transcript and audio found and loaded. Q2 2024. [OK]
Transcript and audio found and loaded. Q1 2024. [OK]
Transcript and audio found and loaded. Q4 2023. [OK]
Transcript and audio found and loaded. Q3 2023. [OK]


KeyboardInterrupt: 