# Obtaining SEC Financial Data

Change the names of the companies below to select the ones to be analyzed

In [1]:
import pandas as pd
import numpy as np
import datetime
import json
from secfsdstools.update import update
from secfsdstools.c_index.companyindexreading import CompanyIndexReader
from secfsdstools.c_index.searching import IndexSearch
from secfsdstools.e_collector.reportcollecting import SingleReportCollector
from secfsdstools.e_filter.rawfiltering import ReportPeriodRawFilter
from secfsdstools.e_presenter.presenting import StandardStatementPresenter

#Prevent logging message prints
import logging

from sqlalchemy import column

logging.getLogger('secfsdstools').setLevel(logging.ERROR)

#Update DB
print("Updating SEC DB...")
update()
print("---Done.")

#Get CIK for Each of Companies
companyNames = [
    "Apple Inc",
    "Johnson & Johnson",
    "JPMorgan Chase",
    "Exxon",
    "Lockheed Martin",
    "NVIDIA CORP"
]

Updating SEC DB...
No rapid-api-key is set: 
If you are interested in daily updates, please have a look at https://rapidapi.com/hansjoerg.wingeier/api/daily-sec-financial-statement-dataset
---Done.


### Company Class, used to store all information associated with company

In [2]:
## Company Class: Stores information from a given CIK
class Company:
    def __init__(self, cik):
        self.cik = cik
        self.report_reader = CompanyIndexReader.get_company_index_reader(cik=self.cik)
        self.consolidated_filings = pd.DataFrame(columns=['tag'])
        self.secLinks = dict()
        self.ticker = ""
        self.transcripts = {}

    def get_cik(self):
        return self.cik

    def get_report_reader(self):
        return self.report_reader

    def getAvailableReports(self):
        return list(self.report_reader.get_all_company_reports_df()['form'].unique())

    def getFilingList(self, reportType, startDate, endDate):
        if reportType == 'All':
            unfilteredDF = self.report_reader.get_all_company_reports_df()
        else:
            unfilteredDF = self.report_reader.get_all_company_reports_df(forms=reportType)

        filteredDF = unfilteredDF[(unfilteredDF.period >= startDate) & (unfilteredDF.period <= endDate)]
        return filteredDF

    def getConsolidatedFilings(self):
        return self.consolidated_filings

    def appendFilings(self, df, filingDate):
        """
        Appends new filings data to the consolidated_filings DataFrame from a dataframe input.
        Includes both tag and stmt columns.

        Parameters:
        - df: pandas DataFrame containing 'tag', 'stmt', and 'merged' columns
        - filingDate: str representing the filing date to be used as column name
        """
        # Get list of tags from input DataFrame
        new_tags = df[['tag', 'stmt']].copy()

        # If this is the first data being added, initialize with both columns
        if len(self.consolidated_filings) == 0:
            self.consolidated_filings = pd.DataFrame(columns=['tag', 'stmt'])

        # Convert existing tag-stmt combinations to set for efficient comparison
        existing_combinations = set(zip(self.consolidated_filings['tag'], self.consolidated_filings['stmt']))
        new_combinations = set(zip(new_tags['tag'], new_tags['stmt']))
        combinations_to_add = new_combinations - existing_combinations

        # Add new tag-stmt combinations if any
        if combinations_to_add:
            new_rows = pd.DataFrame(list(combinations_to_add), columns=['tag', 'stmt'])
            self.consolidated_filings = pd.concat([self.consolidated_filings, new_rows], ignore_index=True)

        # Create the filing date column if it doesn't exist
        if filingDate not in self.consolidated_filings.columns:
            self.consolidated_filings[filingDate] = None

        # Update values for all tags in the input DataFrame
        for _, row in df.iterrows():
            mask = (self.consolidated_filings['tag'] == row['tag']) & (self.consolidated_filings['stmt'] == row['stmt'])
            # Update the value in the filing date column
            self.consolidated_filings.loc[mask, filingDate] = row['merged']

        # Replace all NaN with None
        self.consolidated_filings = self.consolidated_filings.replace({np.nan: None})

        return

    def addSecFormLink(self, period, url):
        self.secLinks[period] = url
        return

    def getSecLink(self):
        return self.secLinks

    def addTicker(self, ticker):
        self.ticker = ticker
        return

    def getTicker(self):
        return self.ticker

    def add_transcript(self, date, transcript_text):
        self.transcripts[date] = transcript_text

    def get_transcript(self, date):
       return self.transcripts.get(date)

Create Company objects for all of the companies to be analyzed

In [3]:
#Determine Company CIK from Name
companyObjDict = dict()
index_search = IndexSearch.get_index_search()
for c in companyNames:
    results = index_search.find_company_by_name(c)
    if len(results) == 1:
        print("CIK for {} : {}".format(c, results.iloc[0]['cik']))
        companyObjDict[c] = Company(cik=results.iloc[0]['cik'])
    else:
        print("-------------------------------------------------")
        print("Multiple CIK for company name {} found:".format(c))
        for index, row in results.iterrows():
            print(index, row['cik'], row['name'])
        selectedIndex = int(input("Select company index from list: "))
        companyObjDict[results.iloc[selectedIndex]['name']] = Company(cik=results.iloc[selectedIndex]['cik'])


CIK for Apple Inc : 320193
CIK for Johnson & Johnson : 200406
CIK for JPMorgan Chase : 19617
CIK for Exxon : 34088
CIK for Lockheed Martin : 936468
CIK for NVIDIA CORP : 1045810


Function used to select only the current period data from a given report

In [4]:
def select_value(row):
    # Get non-NaN values and their indices
    non_nan = [(i, val) for i, val in enumerate(row) if pd.notna(val)]
    if len(non_nan) == 1:
        return non_nan[0][1]
    elif len(non_nan) == 2:
        return non_nan[-1][1]
    return None

Obtain all 10Q/10K reports for each of the companies, we will be focusing only on the income statement as the purpose of this paper is to determine the impact of revenue drivers

In [5]:
import pandas as pd
from pathlib import Path
import datetime
import os

def get_latest_filing_date(filings_df):
    if filings_df.empty:
        return None
    return filings_df['period'].max()

def should_update_filings(csv_path, latest_filing_date):
    if not os.path.exists(csv_path):
        return True
    csv_date = str(csv_path).split('_')[-1].replace('.csv', '')
    return csv_date < latest_filing_date

def process_company_filings(companyObjDict):
    for name, obj in companyObjDict.items():
        # Get latest filings
        filingList = obj.getFilingList(
            reportType=['10-K','10-Q'],
            startDate=0,
            endDate=int(datetime.date.today().strftime('%Y%m%d'))
        ).sort_values('period', ascending=True)

        latest_filing_date = str(filingList['period'].max())
        csv_filename = Path("filings") / f"{name}_consolidated_filings_{latest_filing_date}.csv"
        csv_filename.parent.mkdir(exist_ok=True)  # Add here


        # Check if we need to update
        if not should_update_filings(csv_filename, latest_filing_date):
            print(f"Using cached data for {name} from {csv_filename}")
            obj.consolidated_filings = pd.read_csv(csv_filename, index_col=0)
            continue

        print(f"Processing {filingList.shape[0]} reports for {name}...")

        for row in filingList.itertuples():
            collector = SingleReportCollector.get_report_by_adsh(adsh=row.adsh, stmt_filter=['IS'])
            rawdatabag = collector.collect()
            df = (rawdatabag.filter(ReportPeriodRawFilter())
                           .join()
                           .present(StandardStatementPresenter()))

            cols_after_inpth = df.loc[:, df.columns[df.columns.get_loc('inpth') + 1:]]
            df['merged'] = cols_after_inpth.apply(select_value, axis=1)

            filing_date = datetime.datetime.strptime(str(row.period), '%Y%m%d')
            obj.appendFilings(df, filing_date)
            obj.addSecFormLink(row.period, row.url)

        # Save to CSV with latest filing date in filename
        obj.consolidated_filings.to_csv(csv_filename, index=True)
        print(f"Saved {name} filings to {csv_filename}")


In [6]:
process_company_filings(companyObjDict)

Using cached data for Apple Inc from filings/Apple Inc_consolidated_filings_20240630.csv
Using cached data for Johnson & Johnson from filings/Johnson & Johnson_consolidated_filings_20240630.csv
Using cached data for JPMorgan Chase from filings/JPMorgan Chase_consolidated_filings_20240630.csv
Using cached data for Exxon from filings/Exxon_consolidated_filings_20240630.csv
Using cached data for Lockheed Martin from filings/Lockheed Martin_consolidated_filings_20240630.csv
Using cached data for NVIDIA CORP from filings/NVIDIA CORP_consolidated_filings_20240731.csv


#### Note:

Sales and Revenue numbers are somestimes not reported under the same tag use the below to investigrate.
exampleFilings = companyObjDict[companyNames[0]].getConsolidatedFilings()
exampleFilings[exampleFilings['tag'].str.contains('revenue', case=False, na=False)]

exampleFilings[exampleFilings['tag'].str.contains('sales', case=False, na=False)]

secLinks = companyObjDict[companyNames[0]].getSecLink()
secLinks[20181231]

## Printout of Income Statement for first company

In [7]:
companyObjDict[companyNames[0]].getConsolidatedFilings()

Unnamed: 0,tag,stmt,2009-06-30 00:00:00,2009-09-30 00:00:00,2009-12-31 00:00:00,2010-03-31 00:00:00,2010-06-30 00:00:00,2010-09-30 00:00:00,2010-12-31 00:00:00,2011-03-31 00:00:00,...,2022-03-31 00:00:00,2022-06-30 00:00:00,2022-09-30 00:00:00,2022-12-31 00:00:00,2023-03-31 00:00:00,2023-06-30 00:00:00,2023-09-30 00:00:00,2023-12-31 00:00:00,2024-03-31 00:00:00,2024-06-30 00:00:00
0,OperatingExpenses,IS,1351000000.0,5482000000.0,1686000000.0,1646000000.0,1902000000.0,7299000000.0,2471000000.0,2344000000.0,...,12580000000.0,12809000000.0,51345000000.0,14316000000.0,13658000000.0,13415000000.0,54847000000.0,14482000000.0,14371000000.0,14326000000.0
1,WeightedAverageNumberOfSharesOutstandingBasic,IS,893712000.0,893016000.0,903542000.0,907548000.0,912197000.0,909461000.0,919294000.0,923196000.0,...,16278800000.0,16162940000.0,16215960000.0,15892720000.0,15787150000.0,15697610000.0,15744230000.0,15509760000.0,15405860000.0,15287520000.0
2,SellingGeneralAndAdministrativeExpense,IS,1010000000.0,4149000000.0,1288000000.0,1220000000.0,1438000000.0,5517000000.0,1896000000.0,1763000000.0,...,6193000000.0,6012000000.0,25094000000.0,6607000000.0,6201000000.0,5973000000.0,24932000000.0,6786000000.0,6468000000.0,6320000000.0
3,IncomeTaxExpenseBenefit,IS,503000000.0,2280000000.0,1380000000.0,955000000.0,1039000000.0,4527000000.0,1959000000.0,1913000000.0,...,5129000000.0,3624000000.0,19300000000.0,5625000000.0,4222000000.0,2852000000.0,16741000000.0,6407000000.0,4422000000.0,4046000000.0
4,OperatingIncomeLoss,IS,1672000000.0,7658000000.0,4725000000.0,3979000000.0,4234000000.0,18385000000.0,7827000000.0,7874000000.0,...,29979000000.0,23076000000.0,119437000000.0,36016000000.0,28318000000.0,22998000000.0,114301000000.0,40373000000.0,27900000000.0,25352000000.0
5,NetIncomeLoss,IS,1229000000.0,5704000000.0,3378000000.0,3074000000.0,3253000000.0,4308000000.0,6004000000.0,5987000000.0,...,25010000000.0,19442000000.0,99803000000.0,29998000000.0,24160000000.0,19881000000.0,96995000000.0,33916000000.0,23636000000.0,21448000000.0
6,WeightedAverageNumberOfDilutedSharesOutstanding,IS,909160000.0,907005000.0,919783000.0,922878000.0,927361000.0,924712000.0,933154000.0,935944000.0,...,16403320000.0,16262200000.0,16325820000.0,15955720000.0,15847050000.0,15775020000.0,15812550000.0,15576640000.0,15464710000.0,15348180000.0
7,NonoperatingIncomeExpense,IS,60000000.0,326000000.0,33000000.0,50000000.0,58000000.0,155000000.0,136000000.0,26000000.0,...,160000000.0,-10000000.0,-334000000.0,-393000000.0,64000000.0,-265000000.0,-565000000.0,-50000000.0,158000000.0,142000000.0
8,SalesRevenueNet,IS,8337000000.0,36537000000.0,15683000000.0,13499000000.0,15700000000.0,20343000000.0,26741000000.0,24667000000.0,...,,,,,,,,,,
9,EarningsPerShareBasic,IS,1.38,6.39,3.74,3.39,3.57,4.71,6.53,6.49,...,1.54,1.2,6.15,1.89,1.53,1.27,6.16,2.19,1.53,1.4


# Obtaining Macroeconomic indicators for years of company reports

The key OECD macroeconomic indicators that significantly impact company revenues:

1. GDP Growth Rate
- Direct impact on consumer spending and business investment
- Leading indicator of market expansion/contraction

2. Interest Rates
- Affects borrowing costs and investment decisions
- Influences consumer spending on credit-dependent purchases

3. Inflation Rate (CPI)
- Impacts purchasing power and consumer spending
- Influences pricing strategies and profit margins

4. Employment Rate
- Drives consumer confidence and spending power
- Affects labor costs and workforce availability

5. Industrial Production Index
- Indicates manufacturing sector health
- Supply chain implications

6. Consumer Confidence Index
- Predicts spending patterns
- Early indicator of demand changes

7. Trade Balance
- International market opportunities
- Exchange rate implications

8. Retail Sales
- Direct measure of consumer spending
- Sector-specific growth indicators


In [50]:
import pandas as pd
import numpy as np

def load_oecd_data(filepath):
    """
    Load and transform OECD data into a structured dataframe with metrics as rows
    and time periods as columns.
    """
    # Read the CSV file
    df = pd.read_csv(filepath)

    # Convert TIME_PERIOD to datetime for proper sorting
    df['TIME_PERIOD'] = pd.to_datetime(df['TIME_PERIOD'].str.replace('Q', ''), format='%Y-%m') + pd.offsets.QuarterEnd()

    # Create multi-index for rows
    df_pivot = df.pivot_table(
        index=['INDICATOR', 'SUBJECT', 'MEASURE'],
        columns='TIME_PERIOD',
        values='OBS_VALUE',
        aggfunc='mean'
    )

    # Sort columns in ascending order
    df_pivot = df_pivot[sorted(df_pivot.columns)]

    # Create more readable index labels
    indicator_map = {
        'CPI': 'Consumer Price Index',
        'EMP': 'Employment',
        'INDPROD': 'Industrial Production',
        'LTINT': 'Long-term Interest Rate',
        'STINT': 'Short-term Interest Rate'
    }

    subject_map = {
        'ENRG': 'Energy',
        'FOOD': 'Food',
        'TOT': 'Total',
        'TOT_FOODENRG': 'Core (excl. Food & Energy)',
        'MEN': 'Men',
        'WOMEN': 'Women',
        'CONSTR': 'Construction',
        'MFG': 'Manufacturing'
    }

    measure_map = {
        'AGRWTH': 'Annual Growth Rate (%)',
        'IDX2015': 'Index (2015=100)',
        'PC_WKGPOP': '% of Working Age Population',
        'THND_PER': 'Thousand Persons',
        'PC_PA': '% per Annum'
    }

    df_pivot.index = pd.MultiIndex.from_tuples(
        [(indicator_map.get(i, i), subject_map.get(s, s), measure_map.get(m, m))
         for i, s, m in df_pivot.index],
        names=['Indicator', 'Subject', 'Measure']
    )

    # Sort index
    df_pivot = df_pivot.sort_index()

    return df_pivot

def format_oecd_dataframe(df):
    """
    Format the OECD dataframe by rounding numbers and handling NaN values.
    """
    formatted_df = df.copy()
    formatted_df = formatted_df.round(2)
    formatted_df = formatted_df.fillna('-')
    return formatted_df

In [51]:
# Load the data
df = load_oecd_data('OECD,DF_DP_LIVE,+USA.STINT+LTINT+INDPROD+EMP+CPI...Q.csv')

# For better display, use the formatting function
formatted_df = format_oecd_dataframe(df)
formatted_df

Unnamed: 0_level_0,Unnamed: 1_level_0,TIME_PERIOD,2009-03-31,2009-06-30,2010-03-31,2010-06-30,2011-03-31,2011-06-30,2012-03-31,2012-06-30,2013-03-31,2013-06-30,...,2018-06-30,2019-03-31,2019-06-30,2020-03-31,2020-06-30,2021-03-31,2021-06-30,2022-03-31,2022-06-30,2023-03-31
Indicator,Subject,Measure,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Consumer Price Index,Core (excl. Food & Energy),Annual Growth Rate (%),1.69,1.74,1.04,0.73,1.49,2.16,2.17,1.94,1.78,1.71,...,2.18,2.16,2.3,1.73,1.63,3.1,4.98,6.21,5.98,5.06
Consumer Price Index,Core (excl. Food & Energy),Index (2015=100),90.34,90.97,91.28,91.64,92.64,93.62,94.65,95.44,96.33,97.06,...,106.95,108.4,109.41,110.28,111.19,113.69,116.72,120.75,123.71,126.85
Consumer Price Index,Energy,Annual Growth Rate (%),-23.67,2.17,11.08,5.84,16.78,11.0,0.81,1.63,-0.15,-2.32,...,3.9,-2.63,-0.56,-8.26,-8.54,18.12,30.84,29.78,12.67,-5.0
Consumer Price Index,Energy,Index (2015=100),93.75,99.48,103.86,105.29,121.33,116.87,122.12,118.77,121.94,116.02,...,106.39,106.22,105.8,97.17,96.77,114.41,126.61,148.46,142.64,140.54
Consumer Price Index,Food,Annual Growth Rate (%),1.6,-2.68,-0.09,1.63,4.38,6.05,2.92,1.17,1.0,0.6,...,0.38,0.88,0.91,3.39,3.84,2.6,6.09,11.19,12.09,6.19
Consumer Price Index,Food,Index (2015=100),89.04,88.02,88.96,89.45,92.85,94.86,95.55,95.98,96.51,96.56,...,98.95,99.78,99.86,103.17,103.69,105.84,110.01,117.71,123.31,124.9
Consumer Price Index,Total,Annual Growth Rate (%),-0.94,1.44,1.77,1.27,3.11,3.29,2.13,1.89,1.54,1.23,...,2.2,1.74,2.03,1.24,1.24,4.03,6.69,8.31,7.1,4.43
Consumer Price Index,Total,Index (2015=100),90.29,91.2,91.88,92.36,94.74,95.4,96.76,97.2,98.25,98.4,...,106.34,107.65,108.51,108.98,109.85,113.37,117.2,122.79,125.52,128.21
Employment,Men,% of Working Age Population,72.29,70.74,71.06,70.91,71.22,71.73,72.12,72.56,72.51,72.58,...,76.11,76.44,76.68,71.77,72.65,73.89,75.46,76.04,76.37,76.44
Employment,Men,Thousand Persons,73963.45,72810.0,73317.22,73499.66,74088.0,74930.0,75380.55,76100.66,76300.11,76549.34,...,82960.66,83312.11,83900.66,78281.78,79458.34,80332.11,82346.66,84053.33,84665.34,85490.45


# Obtaining earnings call summaries

We will obtain a complete dataset of earnings calls

In [9]:
import requests

def cik_to_ticker(cik):
   json_file = 'company_tickers.json'

   # Download and save JSON if file doesn't exist
   if not os.path.exists(json_file):
       headers = {
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
       }
       url = 'https://www.sec.gov/files/company_tickers.json'
       response = requests.get(url, headers=headers)
       with open(json_file, 'w') as f:
           f.write(response.text)

   # Read from saved file
   df = pd.read_json(json_file).T
   cik = str(cik).zfill(10)

   if int(cik) in df['cik_str'].values:
       return df[df['cik_str'] == int(cik)]['ticker'].iloc[0]
   return None

In [10]:
import os
from pathlib import Path
import json
from datetime import datetime
from llama_index.readers.earnings_call_transcript import EarningsCallTranscript
import time
from requests.exceptions import RetryError
from typing import Optional

def fetch_transcript(ticker: str, year: int, quarter: str, max_retries: int = 3, retry_delay: int = 60) -> Optional[dict]:
    """
    Fetch transcript with better error handling and retry logic.
    Returns transcript data dictionary if successful, None otherwise.
    """
    for attempt in range(max_retries):
        try:
            loader = EarningsCallTranscript(ticker=ticker, year=year, quarter=quarter)
            docs = loader.load_data()

            if not docs or not hasattr(docs, 'text'):
                print(f"No transcript data found for {ticker} {year} {quarter}")
                return None

            return {
                'date': datetime(year, ((int(quarter[1])-1)*3)+1, 1).strftime('%Y%m%d'),
                'year': year,
                'quarter': quarter,
                'text': docs.text
            }

        except RetryError:
            if attempt < max_retries - 1:
                print(f"Rate limit hit for {ticker} {year} {quarter}. Waiting {retry_delay} seconds... (Attempt {attempt+1}/{max_retries})")
                time.sleep(retry_delay)
            else:
                print(f"Failed to retrieve transcript for {ticker} {year} {quarter} after {max_retries} attempts")
                return None
        except Exception as e:
            print(f"Error processing transcript for {ticker} {year} {quarter}: {type(e).__name__}: {str(e)}")
            if attempt < max_retries - 1:
                time.sleep(retry_delay)
            else:
                return None

    return None

def process_transcripts(companyObjDict, max_retries=3, retry_delay=60):
    """Main processing function with improved error handling"""
    for name, obj in companyObjDict.items():
        ticker = obj.getTicker() or cik_to_ticker(cik=obj.cik)
        obj.addTicker(ticker)

        transcript_dir = Path(f"transcripts/{ticker}")
        transcript_dir.mkdir(parents=True, exist_ok=True)

        # Get existing transcripts
        existing_transcripts = {f.stem.split('_')[1] for f in transcript_dir.glob("*.json")}

        for col in obj.getConsolidatedFilings().columns[2:]:
            date = datetime.strptime(str(col), '%Y-%m-%d %H:%M:%S')
            year = date.year
            quarter = f"Q{(date.month-1)//3 + 1}"
            date_str = date.strftime('%Y%m%d')

            transcript_path = transcript_dir / f"{ticker}_{date_str}.json"

            # Handle existing transcripts
            if date_str in existing_transcripts:
                print(f"Transcript already exists for {ticker} {year} {quarter}")
                try:
                    with open(transcript_path, 'r', encoding='utf-8') as f:
                        transcript_data = json.load(f)
                        obj.add_transcript(date, transcript_data['text'])
                    continue
                except Exception as e:
                    print(f"Error loading existing transcript {transcript_path}: {type(e).__name__}: {str(e)}")
                    # If we can't load existing transcript, try to fetch it again

            # Fetch new transcript
            transcript_data = fetch_transcript(ticker, year, quarter, max_retries, retry_delay)

            if transcript_data:
                try:
                    with open(transcript_path, 'w', encoding='utf-8') as f:
                        json.dump(transcript_data, f, ensure_ascii=False, indent=2)
                    obj.add_transcript(date, transcript_data['text'])
                    print(f"Saved transcript for {ticker} {year} {quarter}")
                except Exception as e:
                    print(f"Error saving transcript to {transcript_path}: {type(e).__name__}: {str(e)}")

            time.sleep(5)  # Rate limiting between requests

In [11]:
# Usage
process_transcripts(companyObjDict, max_retries=3, retry_delay=60)

Transcript already exists for AAPL 2009 Q2
Transcript already exists for AAPL 2009 Q3
Transcript already exists for AAPL 2009 Q4
Transcript already exists for AAPL 2010 Q1
Transcript already exists for AAPL 2010 Q2
Transcript already exists for AAPL 2010 Q3
Transcript already exists for AAPL 2010 Q4
Transcript already exists for AAPL 2011 Q1
Transcript already exists for AAPL 2011 Q2
Transcript already exists for AAPL 2011 Q3
Transcript already exists for AAPL 2011 Q4
Transcript already exists for AAPL 2012 Q1
Transcript already exists for AAPL 2012 Q2
Transcript already exists for AAPL 2012 Q3
Transcript already exists for AAPL 2012 Q4
Transcript already exists for AAPL 2013 Q1
Transcript already exists for AAPL 2013 Q2
Transcript already exists for AAPL 2013 Q3
Transcript already exists for AAPL 2013 Q4
Transcript already exists for AAPL 2014 Q1
Transcript already exists for AAPL 2014 Q2
Transcript already exists for AAPL 2014 Q3
Transcript already exists for AAPL 2014 Q4
Transcript 

# Running experiments utilizing collected data

In [None]:
import os

os.environ['OPENAI_API_KEY'] = "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"