In [3]:
import yfinance as yf
import pandas as pd
from datetime import datetime
from pandas.tseries.offsets import QuarterEnd
import pytz
import time
import os

In [55]:
#1) Read CSV

In [47]:
df = pd.read_csv("step_2_table.csv")
df = df[["quarter","ticker","cik"]]
df.head()

Unnamed: 0,quarter,ticker,cik
0,Q1-19,A,1090872
1,Q1-19,AAL,6201
2,Q1-19,AAP,1158449
3,Q1-19,AAPL,320193
4,Q1-19,ABBV,1551152


In [54]:
#2) get sector and industry
# unique ids to not get blocked

In [57]:
unique_tickers = df['ticker'].unique()
unique_tickers.shape

(632,)

In [62]:
"""tickers = df['ticker'].unique()

# Function to split list into batches
def batch_list(lst, batch_size):
    for i in range(0, len(lst), batch_size):
        yield lst[i:i + batch_size]

# Storage for metadata
sector_industry_map = {}

In [65]:
for batch in batch_list(tickers, 100):  # adjust size if needed
    print(f"Processing batch: {batch[0]} to {batch[-1]}")
    for ticker in batch:
        try:
            info = yf.Ticker(ticker).info
            sector_industry_map[ticker] = {
                'sector': info.get('sector', None),
                'industry': info.get('industry', None)
            }
        except Exception as e:
            print(f"Failed to fetch {ticker}: {e}")
            sector_industry_map[ticker] = {'sector': None, 'industry': None}
        time.sleep(0.1)  # slight pause to be polite to Yahoo servers

Processing batch: A to COST
Processing batch: COTY to HRB
Processing batch: HRL to NOC
Processing batch: NOV to UDR
Processing batch: UHS to FDS
Processing batch: SEDG to KSU
Failed to fetch HCP: HTTP Error 404: 
Processing batch: LLL to SBNY


In [69]:
#map back
df['sector'] = df['ticker'].map(lambda t: sector_industry_map.get(t, {}).get('sector'))
df['industry'] = df['ticker'].map(lambda t: sector_industry_map.get(t, {}).get('industry'))
df.head()

Unnamed: 0,quarter,ticker,cik,quarter_end,sector,industry
0,Q1-19,A,1090872,2019-03-31,Healthcare,Diagnostics & Research
1,Q1-19,AAL,6201,2019-03-31,Industrials,Airlines
2,Q1-19,AAP,1158449,2019-03-31,Consumer Cyclical,Auto Parts
3,Q1-19,AAPL,320193,2019-03-31,Technology,Consumer Electronics
4,Q1-19,ABBV,1551152,2019-03-31,Healthcare,Drug Manufacturers - General


In [111]:
#saving just in case
industry_df = pd.DataFrame.from_dict(sector_industry_map, orient='index')
industry_df.reset_index(inplace=True)
industry_df.columns = ['ticker', 'sector', 'industry']
industry_df.to_csv('ticker_sector_industry.csv', index=False)

In [112]:
industry_df = pd.read_csv('ticker_sector_industry.csv')
industry_df.replace('None', pd.NA, inplace=True)

In [113]:
missing_industry = industry_df[industry_df['sector'].isna() | industry_df['industry'].isna()]

In [114]:
len(missing_industry)

69

In [115]:
#trying wiki

In [119]:
df_wiki = pd.read_csv("step_2_table.csv")
df_wiki = df_wiki[["ticker", "company_name"]]
df_wiki = df_wiki.drop_duplicates(subset=['ticker', 'company_name'])

In [120]:
wiki_df = missing_industry.merge(df_wiki, on='ticker', how='left')

In [123]:
import wikipedia

def get_wikipedia_industry(name):
    try:
        # Search using company name
        summary = wikipedia.summary(name, auto_suggest=False)
        # Try to extract industry manually from summary (very naive)
        for line in summary.split('.'):
            if 'industry' in line.lower():
                return line.strip()
        return None
    except Exception as e:
        print(f"Wiki lookup failed for {name}: {e}")
        return None

In [None]:
#3) get quarter end exact date for price

In [126]:
df = pd.read_csv("step_2_table.csv")
df = df[["quarter","ticker","cik"]]
df.head()

Unnamed: 0,quarter,ticker,cik
0,Q1-19,A,1090872
1,Q1-19,AAL,6201
2,Q1-19,AAP,1158449
3,Q1-19,AAPL,320193
4,Q1-19,ABBV,1551152


In [127]:
def get_quarter_end(quarter_str):
    q, y = quarter_str.split('-')
    year = int('20' + y)
    quarter_end_dates = {
        'Q1': datetime(year, 3, 31),
        'Q2': datetime(year, 6, 30),
        'Q3': datetime(year, 9, 30),
        'Q4': datetime(year, 12, 31),
    }
    return quarter_end_dates[q]

In [128]:
get_quarter_end("Q1-19")

datetime.datetime(2019, 3, 31, 0, 0)

In [129]:
df['quarter_end'] = df['quarter'].apply(get_quarter_end)

In [130]:
df.shape

(11596, 4)

In [136]:
df

Unnamed: 0,quarter,ticker,cik,quarter_end
0,Q1-19,A,1090872,2019-03-31
1,Q1-19,AAL,6201,2019-03-31
2,Q1-19,AAP,1158449,2019-03-31
3,Q1-19,AAPL,320193,2019-03-31
4,Q1-19,ABBV,1551152,2019-03-31
...,...,...,...,...
11591,Q2-24,CTLT,1596783,2024-06-30
11592,Q2-24,MRO,101778,2024-06-30
11593,Q2-24,WRK,1732845,2024-06-30
11594,Q3-24,CTLT,1596783,2024-09-30


In [140]:
master_filename = "quarterly_prices_all.csv"

In [141]:
# Load existing results if continuing from previous run
if os.path.exists(master_filename):
    all_results = pd.read_csv(master_filename, parse_dates=['quarter_end'])
    completed_quarters = set(all_results['quarter'].unique())
    print(f"📄 Loaded existing results for {len(completed_quarters)} quarters.")
else:
    all_results = pd.DataFrame()
    completed_quarters = set()

In [142]:
# Load existing results if continuing from previous run
if os.path.exists(master_filename):
    all_results = pd.read_csv(master_filename, parse_dates=['quarter_end'])
    completed_quarters = set(all_results['quarter'].unique())
    print(f"📄 Loaded existing results for {len(completed_quarters)} quarters.")
else:
    all_results = pd.DataFrame()
    completed_quarters = set()

# --- Loop through each quarter ---
for q in sorted(df['quarter'].unique()):
    if q in completed_quarters:
        print(f"✅ {q} already processed — skipping.")
        continue

    q_end = get_quarter_end(q)
    tickers = df[df['quarter'] == q]['ticker'].unique().tolist()
    start_date = q_end - timedelta(days=7)
    end_date = q_end + timedelta(days=1)

    print(f"\n🔄 Fetching {len(tickers)} tickers for {q} (ending {q_end.date()})")

    try:
        data = yf.download(tickers, start=start_date, end=end_date, group_by='ticker', progress=False)
    except Exception as e:
        print(f"❌ Error downloading for {q}: {e}")
        continue

    quarter_results = []

    for ticker in tickers:
        try:
            if len(tickers) == 1:
                closes = data['Close'].dropna()
            else:
                closes = data[ticker]['Close'].dropna()

            if not closes.empty:
                last_close_date = closes.index[closes.index <= pd.Timestamp(q_end)].max()
                last_price = closes.loc[last_close_date]
            else:
                last_price = float('nan')
        except Exception as e:
            print(f"⚠️  Error processing {ticker}: {e}")
            last_price = float('nan')

        quarter_results.append({
            'ticker': ticker,
            'quarter': q,
            'quarter_end': q_end,
            'Q_End_Price': last_price
        })

    # --- Append and overwrite master CSV ---
    new_df = pd.DataFrame(quarter_results)
    all_results = pd.concat([all_results, new_df], ignore_index=True)
    all_results.to_csv(master_filename, index=False)
    print(f"💾 Appended {len(new_df)} results to {master_filename}")


🔄 Fetching 505 tickers for Q1-19 (ending 2019-03-31)



66 Failed downloads:
['NBL', 'CELG', 'FISV', 'CXO', 'DISCK', 'UTX', 'ABMD', 'BLL', 'ETFC', 'FBHS', 'WLTW', 'WRK', 'FLT', 'WCG', 'ATVI', 'RTN', 'FRC', 'AGN', 'PBCT', 'BHGE', 'XLNX', 'VIAB', 'BBT', 'ANTM', 'DISH', 'TSS', 'RE', 'COG', 'HRS', 'CTXS', 'VAR', 'DWDP', 'GPS', 'MXIM', 'APC', 'JEC', 'TWTR', 'ALXN', 'ADS', 'CERN', 'FB', 'MRO', 'ABC', 'ARNC', 'SIVB', 'TMK', 'TIF', 'FLIR', 'HCP', 'HFC', 'DRE', 'CBS', 'KSU', 'PKI', 'SYMC', 'CTL', 'MYL', 'NLSN', 'DISCA', 'LLL', 'XEC', 'RHT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'STI', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2019-03-24 00:00:00 -> 2019-04-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1553400000, endDate = 1554091200")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q1-20 (ending 2020-03-31)



51 Failed downloads:
['NBL', 'FISV', 'UTX', 'DISCK', 'CXO', 'BLL', 'ETFC', 'FLT', 'FBHS', 'WLTW', 'WRK', 'ATVI', 'AGN', 'RTN', 'FRC', 'ABMD', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'CTXS', 'VAR', 'COG', 'MXIM', 'TWTR', 'ALXN', 'ADS', 'GPS', 'CERN', 'MRO', 'FB', 'ARNC', 'SIVB', 'TIF', 'ABC', 'FLIR', 'PEAK', 'MYL', 'HFC', 'DRE', 'KSU', 'PKI', 'NLSN', 'NLOK', 'CTL', 'DISCA', 'VIAC', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2020-03-24 00:00:00 -> 2020-04-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1585022400, endDate = 1585713600")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q1-21 (ending 2021-03-31)



41 Failed downloads:
['FISV', 'DISCK', 'ABMD', 'BLL', 'FBHS', 'WRK', 'FLT', 'WLTW', 'ATVI', 'FRC', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'COG', 'CTXS', 'VAR', 'GPS', 'MXIM', 'ALXN', 'CERN', 'MRO', 'TWTR', 'ABC', 'FB', 'SIVB', 'FLIR', 'PEAK', 'HFC', 'DRE', 'KSU', 'PKI', 'NLSN', 'NLOK', 'CTLT', 'VIAC', 'PXD', 'DISCA']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2021-03-24 00:00:00 -> 2021-04-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1616558400, endDate = 1617249600")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q1-22 (ending 2022-03-31)



30 Failed downloads:
['FISV', 'DISCK', 'ABMD', 'BLL', 'FBHS', 'FLT', 'WRK', 'ATVI', 'FRC', 'PBCT', 'ANTM', 'DISH', 'RE', 'CTXS', 'TWTR', 'MRO', 'CERN', 'FB', 'ABC', 'SIVB', 'PEAK', 'DRE', 'CDAY', 'PKI', 'NLOK', 'NLSN', 'DISCA', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['SBNY']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-03-24 00:00:00 -> 2022-04-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1648094400, endDate = 1648785600")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q1-23 (ending 2023-03-31)



14 Failed downloads:
['FISV', 'FLT', 'WRK', 'ATVI', 'DISH', 'RE', 'FRC', 'MRO', 'ABC', 'PEAK', 'CDAY', 'PKI', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q1-24 (ending 2024-03-31)



4 Failed downloads:
['WRK', 'MRO', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q2-19 (ending 2019-06-30)



64 Failed downloads:
['NBL', 'CELG', 'FISV', 'CXO', 'DISCK', 'UTX', 'ABMD', 'BLL', 'ETFC', 'FLT', 'WLTW', 'WRK', 'FBHS', 'WCG', 'ATVI', 'FRC', 'RTN', 'PBCT', 'BHGE', 'VIAB', 'XLNX', 'BBT', 'ANTM', 'DISH', 'RE', 'TSS', 'COG', 'CTXS', 'AGN', 'GPS', 'MXIM', 'APC', 'JEC', 'TWTR', 'ALXN', 'ADS', 'MRO', 'CERN', 'FB', 'ABC', 'ARNC', 'TMK', 'SIVB', 'VAR', 'TIF', 'FLIR', 'HCP', 'MYL', 'HFC', 'DRE', 'CBS', 'KSU', 'PKI', 'SYMC', 'NLSN', 'CTL', 'DISCA', 'XEC', 'LLL', 'RHT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'STI', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2019-06-23 00:00:00 -> 2019-07-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1561262400, endDate = 1561953600")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q2-20 (ending 2020-06-30)



46 Failed downloads:
['FISV', 'CXO', 'DISCK', 'ABMD', 'BLL', 'FBHS', 'ETFC', 'NBL', 'WLTW', 'FLT', 'WRK', 'ATVI', 'FRC', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'COG', 'VAR', 'CTXS', 'GPS', 'MXIM', 'TWTR', 'ALXN', 'CERN', 'MRO', 'FB', 'ABC', 'SIVB', 'TIF', 'FLIR', 'MYL', 'PEAK', 'HFC', 'DRE', 'KSU', 'PKI', 'NLOK', 'NLSN', 'CTL', 'DISCA', 'PXD', 'VIAC']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2020-06-23 00:00:00 -> 2020-07-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1592884800, endDate = 1593576000")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q2-21 (ending 2021-06-30)



38 Failed downloads:
['FISV', 'DISCK', 'BLL', 'ABMD', 'FLT', 'FBHS', 'WRK', 'WLTW', 'ATVI', 'FRC', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'COG', 'CTXS', 'GPS', 'TWTR', 'ALXN', 'CERN', 'MRO', 'MXIM', 'FB', 'ABC', 'SIVB', 'PEAK', 'DRE', 'KSU', 'PKI', 'NLOK', 'NLSN', 'DISCA', 'CTLT', 'VIAC', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2021-06-23 00:00:00 -> 2021-07-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1624420800, endDate = 1625112000")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q2-22 (ending 2022-06-30)



23 Failed downloads:
['FISV', 'ABMD', 'FLT', 'FBHS', 'WRK', 'ATVI', 'FRC', 'DISH', 'CTXS', 'TWTR', 'RE', 'MRO', 'ABC', 'SIVB', 'PEAK', 'DRE', 'PKI', 'CDAY', 'NLOK', 'NLSN', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['SBNY']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-06-23 00:00:00 -> 2022-07-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1655956800, endDate = 1656648000")')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q2-23 (ending 2023-06-30)



9 Failed downloads:
['FLT', 'WRK', 'ATVI', 'MRO', 'ABC', 'PEAK', 'CDAY', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q2-24 (ending 2024-06-30)



3 Failed downloads:
['WRK', 'MRO', 'CTLT']: YFTzMissingError('possibly delisted; no timezone found')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q3-19 (ending 2019-09-30)



59 Failed downloads:
['NBL', 'CELG', 'FISV', 'DISCK', 'UTX', 'ABMD', 'BLL', 'ETFC', 'FLT', 'FBHS', 'WCG', 'WRK', 'WLTW', 'ATVI', 'FRC', 'RTN', 'AGN', 'PBCT', 'BHGE', 'XLNX', 'VIAB', 'BBT', 'ANTM', 'DISH', 'CXO', 'RE', 'COG', 'VAR', 'CTXS', 'GPS', 'MXIM', 'JEC', 'TWTR', 'ALXN', 'ADS', 'CERN', 'MRO', 'FB', 'ABC', 'ARNC', 'SIVB', 'TIF', 'FLIR', 'HCP', 'MYL', 'DRE', 'CBS', 'KSU', 'CTL', 'SYMC', 'NLSN', 'XEC', 'DISCA', 'HFC', 'PXD', 'PKI']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'STI', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2019-09-23 00:00:00 -> 2019-10-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1569211200, endDate = 1569902400")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q3-20 (ending 2020-09-30)



46 Failed downloads:
['NBL', 'FISV', 'CXO', 'DISCK', 'ABMD', 'BLL', 'ETFC', 'FBHS', 'FLT', 'WRK', 'WLTW', 'FRC', 'ATVI', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'COG', 'CTXS', 'VAR', 'GPS', 'MXIM', 'TWTR', 'ALXN', 'MRO', 'CERN', 'ABC', 'SIVB', 'TIF', 'FB', 'FLIR', 'PEAK', 'MYL', 'HFC', 'DRE', 'KSU', 'NLOK', 'PKI', 'NLSN', 'DISCA', 'CTLT', 'VIAC', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2020-09-23 00:00:00 -> 2020-10-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1600833600, endDate = 1601524800")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q3-21 (ending 2021-09-30)



36 Failed downloads:
['FISV', 'DISCK', 'ABMD', 'BLL', 'FBHS', 'FLT', 'WLTW', 'WRK', 'ATVI', 'FRC', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'COG', 'CTXS', 'GPS', 'TWTR', 'CERN', 'MRO', 'FB', 'ABC', 'SIVB', 'PEAK', 'DRE', 'KSU', 'CDAY', 'PKI', 'NLOK', 'NLSN', 'DISCA', 'CTLT', 'VIAC', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO']: YFPricesMissingError('possibly delisted; no price data found  (1d 2021-09-23 00:00:00 -> 2021-10-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1632369600, endDate = 1633060800")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q3-22 (ending 2022-09-30)



23 Failed downloads:
['FISV', 'ABMD', 'FBHS', 'WRK', 'FLT', 'ATVI', 'FRC', 'DISH', 'CTXS', 'RE', 'TWTR', 'MRO', 'ABC', 'SIVB', 'PEAK', 'DRE', 'PKI', 'CDAY', 'NLOK', 'NLSN', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['SBNY']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-09-23 00:00:00 -> 2022-10-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1663905600, endDate = 1664596800")')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q3-23 (ending 2023-09-30)



8 Failed downloads:
['FLT', 'WRK', 'ATVI', 'MRO', 'PEAK', 'CDAY', 'CTLT', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 504 tickers for Q3-24 (ending 2024-09-30)



2 Failed downloads:
['MRO', 'CTLT']: YFTzMissingError('possibly delisted; no timezone found')


💾 Appended 504 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q4-19 (ending 2019-12-31)



53 Failed downloads:
['NBL', 'UTX', 'FISV', 'CXO', 'BLL', 'FBHS', 'WLTW', 'FLT', 'ETFC', 'WRK', 'WCG', 'ATVI', 'FRC', 'AGN', 'RTN', 'DISCK', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'ABMD', 'RE', 'COG', 'CTXS', 'VAR', 'GPS', 'MXIM', 'TWTR', 'ALXN', 'ADS', 'MRO', 'CERN', 'FB', 'ABC', 'ARNC', 'SIVB', 'TIF', 'FLIR', 'PEAK', 'MYL', 'HFC', 'DRE', 'KSU', 'PKI', 'NLOK', 'NLSN', 'CTL', 'XEC', 'DISCA', 'VIAC', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2019-12-24 00:00:00 -> 2020-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1577163600, endDate = 1577854800")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q4-20 (ending 2020-12-31)



43 Failed downloads:
['FISV', 'CXO', 'DISCK', 'BLL', 'ABMD', 'FBHS', 'FLT', 'WRK', 'WLTW', 'ATVI', 'FRC', 'PBCT', 'XLNX', 'ANTM', 'DISH', 'RE', 'COG', 'CTXS', 'VAR', 'GPS', 'MXIM', 'TWTR', 'ALXN', 'MRO', 'FB', 'ABC', 'SIVB', 'TIF', 'FLIR', 'PEAK', 'HFC', 'DRE', 'KSU', 'PKI', 'NLSN', 'DISCA', 'CTLT', 'VIAC', 'PXD', 'CERN', 'NLOK']: YFTzMissingError('possibly delisted; no timezone found')
['INFO', 'LB']: YFPricesMissingError('possibly delisted; no price data found  (1d 2020-12-24 00:00:00 -> 2021-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1608786000, endDate = 1609477200")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 505 tickers for Q4-21 (ending 2021-12-31)



35 Failed downloads:
['FISV', 'DISCK', 'ABMD', 'BLL', 'WLTW', 'FBHS', 'FLT', 'WRK', 'ATVI', 'FRC', 'ANTM', 'DISH', 'RE', 'CTXS', 'GPS', 'PBCT', 'XLNX', 'TWTR', 'CERN', 'MRO', 'FB', 'ABC', 'SIVB', 'PEAK', 'DRE', 'CDAY', 'NLOK', 'PKI', 'NLSN', 'DISCA', 'CTLT', 'VIAC', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['SBNY', 'INFO']: YFPricesMissingError('possibly delisted; no price data found  (1d 2021-12-24 00:00:00 -> 2022-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1640322000, endDate = 1641013200")')


💾 Appended 505 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q4-22 (ending 2022-12-31)



16 Failed downloads:
['FISV', 'FLT', 'WRK', 'ATVI', 'FRC', 'DISH', 'RE', 'ABC', 'MRO', 'PEAK', 'CDAY', 'PKI', 'CTLT', 'SIVB', 'PXD']: YFTzMissingError('possibly delisted; no timezone found')
['SBNY']: YFPricesMissingError('possibly delisted; no price data found  (1d 2022-12-24 00:00:00 -> 2023-01-01 00:00:00) (Yahoo error = "Data doesn\'t exist for startDate = 1671858000, endDate = 1672549200")')


💾 Appended 503 results to quarterly_prices_all.csv

🔄 Fetching 503 tickers for Q4-24 (ending 2024-12-31)
💾 Appended 503 results to quarterly_prices_all.csv


In [4]:
df = pd.read_csv("quarterly_prices_all.csv")

In [5]:
df.head()

Unnamed: 0,ticker,quarter,quarter_end,Q_End_Price
0,A,Q1-19,31/03/2019,76.857788
1,AAL,Q1-19,31/03/2019,31.339823
2,AAP,Q1-19,31/03/2019,152.464035
3,AAPL,Q1-19,31/03/2019,45.441719
4,ABBV,Q1-19,31/03/2019,61.163391


In [6]:
import pandas as pd
import yfinance as yf
from functools import lru_cache
import swifter  # Optional but faster if installed
import os

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
# ---------- 1. Cache yf.Ticker ----------
@lru_cache(maxsize=1000)
def get_stock(ticker):
    return yf.Ticker(ticker)

# ---------- 2. Combined earnings + previous close logic ----------
def process_row(ticker, quarter_end):
    try:
        quarter_end = pd.to_datetime(quarter_end, dayfirst=True)
        stock = get_stock(ticker)

        earnings_df = stock.get_earnings_dates(limit=12)
        if earnings_df is None or earnings_df.empty:
            return None, None

        earnings_df = earnings_df.copy()
        earnings_df.index = pd.to_datetime(earnings_df.index, errors='coerce')
        earnings_df.dropna(inplace=True)

        valid_dates = earnings_df[earnings_df.index <= quarter_end]
        if valid_dates.empty:
            return None, None

        earnings_date = valid_dates.index.max()

        prev_day = earnings_date - pd.Timedelta(days=1)
        start_date = prev_day - pd.Timedelta(days=5)
        history = stock.history(start=start_date.strftime('%Y-%m-%d'), end=earnings_date.strftime('%Y-%m-%d'))

        for i in range(5):
            lookup_day = prev_day - pd.Timedelta(days=i)
            if lookup_day in history.index:
                return earnings_date, history.loc[lookup_day, 'Close']

    except Exception as e:
        print(f"Error for {ticker}: {e}")

    return None, None

In [12]:
print(process_row("AAPL", "31/03/2019"))  # Should return a real date + float close
print(process_row("ZZZZ", "31/03/2019"))

Error for AAPL: 'NoneType' object is not subscriptable
(None, None)
Error for ZZZZ: 'NoneType' object is not subscriptable
(None, None)


In [None]:
# ---------- 3. Main loop ----------
df['quarter_end'] = pd.to_datetime(df['quarter_end'], dayfirst=True)
output_file = "earnings_with_prices.csv"
df_master = pd.DataFrame()
batch_size = 500

for i in range(0, len(df), batch_size):
    print(f"Processing rows {i} to {i + batch_size - 1}")
    df_batch = df.iloc[i:i+batch_size].copy()

    df_batch[['earnings_date', 'prev_day_close']] = df_batch.swifter.apply(
        lambda row: pd.Series(process_row(row['ticker'], row['quarter_end'])),
        axis=1
    )

    df_master = pd.concat([df_master, df_batch], ignore_index=True)

    # Save after each batch (overwrite)
    df_master.to_csv(output_file, index=False)
    print(f"Batch saved: {output_file}")


Processing rows 0 to 499


Pandas Apply:   1%|▏                            | 3/500 [00:00<00:35, 14.17it/s]

Error for A: 'NoneType' object is not subscriptable
Error for AAL: 'NoneType' object is not subscriptable


Pandas Apply:   1%|▎                            | 5/500 [00:00<00:42, 11.72it/s]

Error for AAP: 'NoneType' object is not subscriptable
Error for AAPL: 'NoneType' object is not subscriptable
Error for ABBV: 'NoneType' object is not subscriptable


Pandas Apply:   2%|▌                            | 9/500 [00:00<00:45, 10.84it/s]

Error for ABT: 'NoneType' object is not subscriptable
Error for ACN: 'NoneType' object is not subscriptable
Error for ADBE: 'NoneType' object is not subscriptable


Pandas Apply:   2%|▌                           | 11/500 [00:01<00:47, 10.37it/s]

Error for ADI: 'NoneType' object is not subscriptable
Error for ADM: 'NoneType' object is not subscriptable


Pandas Apply:   3%|▋                           | 13/500 [00:01<00:45, 10.65it/s]

Error for ADP: 'NoneType' object is not subscriptable
Error for ADSK: 'NoneType' object is not subscriptable
Error for AEE: 'NoneType' object is not subscriptable


Pandas Apply:   3%|▊                           | 15/500 [00:01<00:46, 10.51it/s]

Error for AEP: 'NoneType' object is not subscriptable
Error for AES: 'NoneType' object is not subscriptable


Pandas Apply:   4%|█                           | 19/500 [00:01<00:48,  9.98it/s]

Error for AFL: 'NoneType' object is not subscriptable
Error for AIG: 'NoneType' object is not subscriptable
Error for AIV: 'NoneType' object is not subscriptable


Pandas Apply:   4%|█▏                          | 21/500 [00:01<00:46, 10.29it/s]

Error for AIZ: 'NoneType' object is not subscriptable
Error for AJG: 'NoneType' object is not subscriptable
Error for AKAM: 'NoneType' object is not subscriptable


Pandas Apply:   5%|█▍                          | 25/500 [00:02<00:45, 10.51it/s]

Error for ALB: 'NoneType' object is not subscriptable
Error for ALGN: 'NoneType' object is not subscriptable
Error for ALK: 'NoneType' object is not subscriptable


Pandas Apply:   5%|█▌                          | 27/500 [00:02<00:44, 10.57it/s]

Error for ALL: 'NoneType' object is not subscriptable
Error for ALLE: 'NoneType' object is not subscriptable


Pandas Apply:   6%|█▌                          | 29/500 [00:02<00:46, 10.13it/s]

Error for AMAT: 'NoneType' object is not subscriptable
Error for AMD: 'NoneType' object is not subscriptable
Error for AME: 'NoneType' object is not subscriptable


Pandas Apply:   7%|█▊                          | 33/500 [00:03<00:45, 10.37it/s]

Error for AMG: 'NoneType' object is not subscriptable
Error for AMGN: 'NoneType' object is not subscriptable
Error for AMP: 'NoneType' object is not subscriptable


Pandas Apply:   7%|█▉                          | 35/500 [00:03<00:46, 10.03it/s]

Error for AMT: 'NoneType' object is not subscriptable
Error for AMZN: 'NoneType' object is not subscriptable


Pandas Apply:   7%|██                          | 37/500 [00:03<00:47,  9.72it/s]

Error for ANET: 'NoneType' object is not subscriptable
Error for ANSS: 'NoneType' object is not subscriptable
Error for AON: 'NoneType' object is not subscriptable


Pandas Apply:   8%|██▏                         | 39/500 [00:03<00:45, 10.04it/s]

Error for AOS: 'NoneType' object is not subscriptable
Error for APA: 'NoneType' object is not subscriptable


Pandas Apply:   8%|██▎                         | 41/500 [00:03<00:46,  9.96it/s]

Error for APD: 'NoneType' object is not subscriptable
Error for APH: 'NoneType' object is not subscriptable


Pandas Apply:   9%|██▍                         | 44/500 [00:04<00:48,  9.49it/s]

Error for APTV: 'NoneType' object is not subscriptable
Error for ARE: 'NoneType' object is not subscriptable


Pandas Apply:   9%|██▌                         | 46/500 [00:04<00:48,  9.42it/s]

Error for ATO: 'NoneType' object is not subscriptable
Error for AVB: 'NoneType' object is not subscriptable
Error for AVGO: 'NoneType' object is not subscriptable


Pandas Apply:  10%|██▊                         | 50/500 [00:04<00:43, 10.37it/s]

Error for AVY: 'NoneType' object is not subscriptable
Error for AWK: 'NoneType' object is not subscriptable
Error for AXP: 'NoneType' object is not subscriptable


Pandas Apply:  10%|██▉                         | 52/500 [00:05<00:42, 10.47it/s]

Error for AZO: 'NoneType' object is not subscriptable
Error for BA: 'NoneType' object is not subscriptable
Error for BAC: 'NoneType' object is not subscriptable


Pandas Apply:  11%|███                         | 55/500 [00:05<00:49,  8.94it/s]

Error for BAX: 'NoneType' object is not subscriptable
Error for BBY: 'NoneType' object is not subscriptable


Pandas Apply:  11%|███▏                        | 57/500 [00:05<00:47,  9.31it/s]

Error for BDX: 'NoneType' object is not subscriptable
Error for BEN: 'NoneType' object is not subscriptable
Error for BF-B: 'NoneType' object is not subscriptable


Pandas Apply:  12%|███▍                        | 61/500 [00:06<00:42, 10.39it/s]

Error for BHF: 'NoneType' object is not subscriptable
Error for BIIB: 'NoneType' object is not subscriptable
Error for BK: 'NoneType' object is not subscriptable


Pandas Apply:  13%|███▌                        | 63/500 [00:06<00:43,  9.97it/s]

Error for BKNG: 'NoneType' object is not subscriptable
Error for BLK: 'NoneType' object is not subscriptable


Pandas Apply:  13%|███▋                        | 65/500 [00:06<00:43, 10.08it/s]

Error for BMY: 'NoneType' object is not subscriptable
Error for BR: 'NoneType' object is not subscriptable
Error for BRK-B: 'NoneType' object is not subscriptable


Pandas Apply:  14%|███▊                        | 69/500 [00:06<00:41, 10.33it/s]

Error for BSX: 'NoneType' object is not subscriptable
Error for BWA: 'NoneType' object is not subscriptable
Error for BXP: 'NoneType' object is not subscriptable


Pandas Apply:  14%|███▉                        | 71/500 [00:07<00:42, 10.15it/s]

Error for C: 'NoneType' object is not subscriptable
Error for CAG: 'NoneType' object is not subscriptable
Error for CAH: 'NoneType' object is not subscriptable


Pandas Apply:  15%|████▏                       | 75/500 [00:07<00:39, 10.73it/s]

Error for CAT: 'NoneType' object is not subscriptable
Error for CB: 'NoneType' object is not subscriptable
Error for CBOE: 'NoneType' object is not subscriptable


Pandas Apply:  15%|████▎                       | 77/500 [00:07<00:39, 10.68it/s]

Error for CBRE: 'NoneType' object is not subscriptable
Error for CCI: 'NoneType' object is not subscriptable
Error for CCL: 'NoneType' object is not subscriptable


Pandas Apply:  16%|████▌                       | 81/500 [00:07<00:40, 10.46it/s]

Error for CDNS: 'NoneType' object is not subscriptable
Error for CE: 'NoneType' object is not subscriptable
Error for CF: 'NoneType' object is not subscriptable


Pandas Apply:  17%|████▋                       | 83/500 [00:08<00:40, 10.39it/s]

Error for CFG: 'NoneType' object is not subscriptable
Error for CHD: 'NoneType' object is not subscriptable
Error for CHRW: 'NoneType' object is not subscriptable


Pandas Apply:  17%|████▊                       | 87/500 [00:08<00:38, 10.86it/s]

Error for CHTR: 'NoneType' object is not subscriptable
Error for CI: 'NoneType' object is not subscriptable
Error for CINF: 'NoneType' object is not subscriptable


Pandas Apply:  18%|████▉                       | 89/500 [00:08<00:39, 10.46it/s]

Error for CL: 'NoneType' object is not subscriptable
Error for CLX: 'NoneType' object is not subscriptable


Pandas Apply:  18%|█████                       | 91/500 [00:08<00:42,  9.69it/s]

Error for CMA: 'NoneType' object is not subscriptable
Error for CMCSA: 'NoneType' object is not subscriptable


Pandas Apply:  19%|█████▏                      | 93/500 [00:09<00:46,  8.68it/s]

Error for CME: 'NoneType' object is not subscriptable
Error for CMG: 'NoneType' object is not subscriptable


Pandas Apply:  19%|█████▎                      | 95/500 [00:09<00:43,  9.32it/s]

Error for CMI: 'NoneType' object is not subscriptable
Error for CMS: 'NoneType' object is not subscriptable
Error for CNC: 'NoneType' object is not subscriptable


Pandas Apply:  20%|█████▍                      | 98/500 [00:09<00:42,  9.43it/s]

Error for CNP: 'NoneType' object is not subscriptable
Error for COF: 'NoneType' object is not subscriptable


Pandas Apply:  20%|█████▍                     | 100/500 [00:09<00:39, 10.03it/s]

Error for COO: 'NoneType' object is not subscriptable
Error for COP: 'NoneType' object is not subscriptable
Error for COST: 'NoneType' object is not subscriptable


Pandas Apply:  21%|█████▌                     | 104/500 [00:10<00:37, 10.47it/s]

Error for COTY: 'NoneType' object is not subscriptable
Error for CPB: 'NoneType' object is not subscriptable
Error for CPRI: 'NoneType' object is not subscriptable


Pandas Apply:  21%|█████▋                     | 106/500 [00:10<00:38, 10.25it/s]

Error for CPRT: 'NoneType' object is not subscriptable
Error for CRM: 'NoneType' object is not subscriptable


Pandas Apply:  22%|█████▊                     | 108/500 [00:10<00:37, 10.56it/s]

Error for CSCO: 'NoneType' object is not subscriptable
Error for CSX: 'NoneType' object is not subscriptable
Error for CTAS: 'NoneType' object is not subscriptable


Pandas Apply:  22%|█████▉                     | 110/500 [00:10<00:36, 10.63it/s]

Error for CTSH: 'NoneType' object is not subscriptable
Error for CVS: 'NoneType' object is not subscriptable


Pandas Apply:  22%|██████                     | 112/500 [00:11<00:37, 10.21it/s]

Error for CVX: 'NoneType' object is not subscriptable
Error for D: 'NoneType' object is not subscriptable


Pandas Apply:  23%|██████▏                    | 115/500 [00:11<00:39,  9.68it/s]

Error for DAL: 'NoneType' object is not subscriptable
Error for DE: 'NoneType' object is not subscriptable
Error for DFS: 'NoneType' object is not subscriptable


Pandas Apply:  24%|██████▍                    | 119/500 [00:11<00:41,  9.10it/s]

Error for DG: 'NoneType' object is not subscriptable
Error for DGX: 'NoneType' object is not subscriptable
Error for DHI: 'NoneType' object is not subscriptable


Pandas Apply:  24%|██████▌                    | 121/500 [00:12<00:39,  9.72it/s]

Error for DHR: 'NoneType' object is not subscriptable
Error for DIS: 'NoneType' object is not subscriptable
Error for DLR: 'NoneType' object is not subscriptable


Pandas Apply:  25%|██████▊                    | 125/500 [00:12<00:38,  9.75it/s]

Error for DLTR: 'NoneType' object is not subscriptable
Error for DOV: 'NoneType' object is not subscriptable
Error for DRI: 'NoneType' object is not subscriptable


Pandas Apply:  26%|██████▉                    | 128/500 [00:12<00:36, 10.16it/s]

Error for DTE: 'NoneType' object is not subscriptable
Error for DUK: 'NoneType' object is not subscriptable
Error for DVA: 'NoneType' object is not subscriptable


Pandas Apply:  26%|███████                    | 130/500 [00:12<00:35, 10.43it/s]

Error for DVN: 'NoneType' object is not subscriptable
Error for DXC: 'NoneType' object is not subscriptable
Error for EA: 'NoneType' object is not subscriptable


Pandas Apply:  27%|███████▏                   | 134/500 [00:13<00:35, 10.46it/s]

Error for EBAY: 'NoneType' object is not subscriptable
Error for ECL: 'NoneType' object is not subscriptable
Error for ED: 'NoneType' object is not subscriptable


Pandas Apply:  27%|███████▎                   | 136/500 [00:13<00:37,  9.73it/s]

Error for EFX: 'NoneType' object is not subscriptable
Error for EIX: 'NoneType' object is not subscriptable
Error for EL: 'NoneType' object is not subscriptable


Exception ignored from cffi callback <function buffer_callback at 0x109e4b560>:
Traceback (most recent call last):
  File "/Users/chloecurtis/.pyenv/versions/3.12.9/envs/corporate-sentiment-tracker/lib/python3.12/site-packages/curl_cffi/curl.py", line 67, in buffer_callback
    @ffi.def_extern()
    
KeyboardInterrupt: 
Pandas Apply:  28%|███████▍                   | 138/500 [00:13<00:32, 11.01it/s]

Error for EMN: Failed to perform, curl: (23) Failure writing output to destination, passed 13 returned 0. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.
Error for EMR: 'NoneType' object is not subscriptable


Pandas Apply:  28%|███████▋                   | 142/500 [00:14<00:35, 10.23it/s]

Error for EOG: 'NoneType' object is not subscriptable
Error for EQIX: 'NoneType' object is not subscriptable
Error for EQR: 'NoneType' object is not subscriptable


Pandas Apply:  29%|███████▊                   | 144/500 [00:14<00:34, 10.45it/s]

Error for ES: 'NoneType' object is not subscriptable
Error for ESS: 'NoneType' object is not subscriptable
Error for ETN: 'NoneType' object is not subscriptable


Pandas Apply:  30%|███████▉                   | 148/500 [00:14<00:32, 10.89it/s]

Error for ETR: 'NoneType' object is not subscriptable
Error for EVRG: 'NoneType' object is not subscriptable
Error for EW: 'NoneType' object is not subscriptable


Pandas Apply:  30%|████████                   | 150/500 [00:14<00:32, 10.87it/s]

Error for EXC: 'NoneType' object is not subscriptable
Error for EXPD: 'NoneType' object is not subscriptable
Error for EXPE: 'NoneType' object is not subscriptable


Pandas Apply:  31%|████████▎                  | 154/500 [00:15<00:31, 10.95it/s]

Error for EXR: 'NoneType' object is not subscriptable
Error for F: 'NoneType' object is not subscriptable
Error for FANG: 'NoneType' object is not subscriptable


Exception ignored from cffi callback <function buffer_callback at 0x109e4b560>:
Traceback (most recent call last):
  File "/Users/chloecurtis/.pyenv/versions/3.12.9/envs/corporate-sentiment-tracker/lib/python3.12/site-packages/curl_cffi/curl.py", line 67, in buffer_callback
    @ffi.def_extern()
    
KeyboardInterrupt: 
Pandas Apply:  31%|████████▍                  | 156/500 [00:15<00:32, 10.56it/s]

Error for FAST: 'NoneType' object is not subscriptable
Error for FCX: Failed to perform, curl: (23) Failure writing output to destination, passed 13 returned 0. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.
Error for FDX: 'NoneType' object is not subscriptable


Pandas Apply:  32%|████████▋                  | 160/500 [00:15<00:31, 10.85it/s]

Error for FE: 'NoneType' object is not subscriptable
Error for FFIV: 'NoneType' object is not subscriptable
Error for FIS: 'NoneType' object is not subscriptable


Pandas Apply:  32%|████████▋                  | 162/500 [00:15<00:30, 11.05it/s]

Error for FITB: 'NoneType' object is not subscriptable
Error for FL: 'NoneType' object is not subscriptable
Error for FLR: 'NoneType' object is not subscriptable


Pandas Apply:  33%|████████▊                  | 164/500 [00:16<00:30, 10.89it/s]

Error for FLS: 'NoneType' object is not subscriptable
Error for FMC: 'NoneType' object is not subscriptable


Pandas Apply:  33%|████████▉                  | 166/500 [00:16<00:31, 10.45it/s]

Error for FOX: 'NoneType' object is not subscriptable
Error for FOXA: 'NoneType' object is not subscriptable


Pandas Apply:  34%|█████████▏                 | 170/500 [00:16<00:31, 10.48it/s]

Error for FRT: 'NoneType' object is not subscriptable
Error for FTI: 'NoneType' object is not subscriptable
Error for FTNT: 'NoneType' object is not subscriptable


Pandas Apply:  34%|█████████▎                 | 172/500 [00:16<00:31, 10.36it/s]

Error for FTV: 'NoneType' object is not subscriptable
Error for GD: 'NoneType' object is not subscriptable
Error for GE: 'NoneType' object is not subscriptable


Pandas Apply:  35%|█████████▍                 | 174/500 [00:17<00:30, 10.74it/s]

Error for GILD: 'NoneType' object is not subscriptable
Error for GIS: 'NoneType' object is not subscriptable


Pandas Apply:  36%|█████████▌                 | 178/500 [00:17<00:31, 10.31it/s]

Error for GLW: 'NoneType' object is not subscriptable
Error for GM: 'NoneType' object is not subscriptable
Error for GOOG: 'NoneType' object is not subscriptable


Pandas Apply:  36%|█████████▋                 | 180/500 [00:17<00:32,  9.97it/s]

Error for GOOGL: 'NoneType' object is not subscriptable
Error for GPC: 'NoneType' object is not subscriptable
Error for GPN: 'NoneType' object is not subscriptable


Pandas Apply:  37%|█████████▉                 | 184/500 [00:18<00:30, 10.42it/s]

Error for GRMN: 'NoneType' object is not subscriptable
Error for GS: 'NoneType' object is not subscriptable
Error for GWW: 'NoneType' object is not subscriptable


Pandas Apply:  37%|██████████                 | 186/500 [00:18<00:30, 10.35it/s]

Error for HAL: 'NoneType' object is not subscriptable
Error for HAS: 'NoneType' object is not subscriptable
Error for HBAN: 'NoneType' object is not subscriptable


Pandas Apply:  38%|██████████▎                | 190/500 [00:18<00:29, 10.39it/s]

Error for HBI: 'NoneType' object is not subscriptable
Error for HCA: 'NoneType' object is not subscriptable
Error for HD: 'NoneType' object is not subscriptable


Pandas Apply:  38%|██████████▎                | 192/500 [00:18<00:30, 10.08it/s]

Error for HES: 'NoneType' object is not subscriptable
Error for HIG: 'NoneType' object is not subscriptable


Pandas Apply:  39%|██████████▍                | 194/500 [00:19<00:29, 10.21it/s]

Error for HII: 'NoneType' object is not subscriptable
Error for HLT: 'NoneType' object is not subscriptable
Error for HOG: 'NoneType' object is not subscriptable


Pandas Apply:  40%|██████████▋                | 198/500 [00:19<00:28, 10.50it/s]

Error for HOLX: 'NoneType' object is not subscriptable
Error for HON: 'NoneType' object is not subscriptable
Error for HP: 'NoneType' object is not subscriptable


Pandas Apply:  40%|██████████▊                | 200/500 [00:19<00:27, 10.75it/s]

Error for HPE: 'NoneType' object is not subscriptable
Error for HPQ: 'NoneType' object is not subscriptable
Error for HRB: 'NoneType' object is not subscriptable


Pandas Apply:  41%|███████████                | 204/500 [00:19<00:27, 10.82it/s]

Error for HRL: 'NoneType' object is not subscriptable
Error for HSIC: 'NoneType' object is not subscriptable
Error for HST: 'NoneType' object is not subscriptable


Pandas Apply:  41%|███████████                | 206/500 [00:20<00:27, 10.58it/s]

Error for HSY: 'NoneType' object is not subscriptable
Error for HUM: 'NoneType' object is not subscriptable


Pandas Apply:  42%|███████████▏               | 208/500 [00:20<00:30,  9.68it/s]

Error for IBM: 'NoneType' object is not subscriptable
Error for ICE: 'NoneType' object is not subscriptable


Pandas Apply:  42%|███████████▎               | 210/500 [00:20<00:29,  9.96it/s]

Error for IDXX: 'NoneType' object is not subscriptable
Error for IFF: 'NoneType' object is not subscriptable


Pandas Apply:  42%|███████████▍               | 212/500 [00:20<00:30,  9.42it/s]

Error for ILMN: 'NoneType' object is not subscriptable
Error for INCY: 'NoneType' object is not subscriptable


Pandas Apply:  43%|███████████▌               | 214/500 [00:21<00:30,  9.34it/s]

Error for INTC: 'NoneType' object is not subscriptable
Error for INTU: 'NoneType' object is not subscriptable


Pandas Apply:  43%|███████████▋               | 217/500 [00:21<00:28,  9.79it/s]

Error for IP: 'NoneType' object is not subscriptable
Error for IPG: 'NoneType' object is not subscriptable
Error for IPGP: 'NoneType' object is not subscriptable


Exception ignored from cffi callback <function buffer_callback at 0x109e4b560>:
Traceback (most recent call last):
  File "/Users/chloecurtis/.pyenv/versions/3.12.9/envs/corporate-sentiment-tracker/lib/python3.12/site-packages/curl_cffi/curl.py", line 67, in buffer_callback
    @ffi.def_extern()
    
KeyboardInterrupt: 
Pandas Apply:  44%|███████████▉               | 221/500 [00:21<00:24, 11.48it/s]

Error for IQV: 'NoneType' object is not subscriptable
Error for IR: Failed to perform, curl: (23) Failure writing output to destination, passed 13 returned 0. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.
Error for IRM: 'NoneType' object is not subscriptable
Error for ISRG: 'NoneType' object is not subscriptable


Pandas Apply:  45%|████████████               | 223/500 [00:21<00:24, 11.14it/s]

Error for IT: 'NoneType' object is not subscriptable
Error for ITW: 'NoneType' object is not subscriptable
Error for IVZ: 'NoneType' object is not subscriptable


Pandas Apply:  45%|████████████▏              | 225/500 [00:21<00:24, 11.09it/s]

Error for JBHT: 'NoneType' object is not subscriptable
Error for JCI: 'NoneType' object is not subscriptable


Pandas Apply:  46%|████████████▎              | 229/500 [00:22<00:25, 10.59it/s]

Error for JEF: 'NoneType' object is not subscriptable
Error for JKHY: 'NoneType' object is not subscriptable
Error for JNJ: 'NoneType' object is not subscriptable


Pandas Apply:  46%|████████████▍              | 231/500 [00:22<00:26, 10.28it/s]

Error for JNPR: 'NoneType' object is not subscriptable
Error for JPM: 'NoneType' object is not subscriptable
Error for JWN: 'NoneType' object is not subscriptable


Exception ignored from cffi callback <function buffer_callback at 0x109e4b560>:
Traceback (most recent call last):
  File "/Users/chloecurtis/.pyenv/versions/3.12.9/envs/corporate-sentiment-tracker/lib/python3.12/site-packages/curl_cffi/curl.py", line 67, in buffer_callback
    @ffi.def_extern()
    
KeyboardInterrupt: 
Pandas Apply:  47%|████████████▌              | 233/500 [00:22<00:24, 10.73it/s]

Error for K: Failed to perform, curl: (23) Failure writing output to destination, passed 13 returned 0. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.
Error for KEY: 'NoneType' object is not subscriptable


Pandas Apply:  47%|████████████▋              | 236/500 [00:23<00:28,  9.28it/s]

Error for KEYS: 'NoneType' object is not subscriptable
Error for KHC: 'NoneType' object is not subscriptable


Pandas Apply:  48%|████████████▊              | 238/500 [00:23<00:27,  9.39it/s]

Error for KIM: 'NoneType' object is not subscriptable
Error for KLAC: 'NoneType' object is not subscriptable


Pandas Apply:  48%|████████████▉              | 240/500 [00:23<00:27,  9.45it/s]

Error for KMB: 'NoneType' object is not subscriptable
Error for KMI: 'NoneType' object is not subscriptable


Pandas Apply:  49%|█████████████              | 243/500 [00:23<00:25, 10.06it/s]

Error for KMX: 'NoneType' object is not subscriptable
Error for KO: 'NoneType' object is not subscriptable
Error for KR: 'NoneType' object is not subscriptable


Pandas Apply:  49%|█████████████▏             | 245/500 [00:24<00:24, 10.48it/s]

Error for KSS: 'NoneType' object is not subscriptable
Error for L: 'NoneType' object is not subscriptable
Error for LB: 'NoneType' object is not subscriptable


Pandas Apply:  50%|█████████████▍             | 249/500 [00:24<00:24, 10.44it/s]

Error for LEG: 'NoneType' object is not subscriptable
Error for LEN: 'NoneType' object is not subscriptable
Error for LH: 'NoneType' object is not subscriptable


Pandas Apply:  50%|█████████████▌             | 251/500 [00:24<00:23, 10.74it/s]

Error for LIN: 'NoneType' object is not subscriptable
Error for LKQ: 'NoneType' object is not subscriptable
Error for LLY: 'NoneType' object is not subscriptable


Pandas Apply:  51%|█████████████▊             | 255/500 [00:24<00:22, 10.99it/s]

Error for LMT: 'NoneType' object is not subscriptable
Error for LNC: 'NoneType' object is not subscriptable
Error for LNT: 'NoneType' object is not subscriptable


Pandas Apply:  51%|█████████████▉             | 257/500 [00:25<00:23, 10.15it/s]

Error for LOW: 'NoneType' object is not subscriptable
Error for LRCX: 'NoneType' object is not subscriptable


Pandas Apply:  52%|█████████████▉             | 259/500 [00:25<00:25,  9.38it/s]

Error for LUV: 'NoneType' object is not subscriptable
Error for LW: 'NoneType' object is not subscriptable
Error for LYB: 'NoneType' object is not subscriptable


Pandas Apply:  52%|██████████████             | 261/500 [00:25<00:24,  9.75it/s]

Error for M: 'NoneType' object is not subscriptable
Error for MA: 'NoneType' object is not subscriptable


Pandas Apply:  53%|██████████████▎            | 265/500 [00:26<00:25,  9.08it/s]

Error for MAA: 'NoneType' object is not subscriptable
Error for MAC: 'NoneType' object is not subscriptable
Error for MAR: 'NoneType' object is not subscriptable


Pandas Apply:  53%|██████████████▍            | 267/500 [00:26<00:24,  9.65it/s]

Error for MAS: 'NoneType' object is not subscriptable
Error for MAT: 'NoneType' object is not subscriptable
Error for MCD: 'NoneType' object is not subscriptable


Pandas Apply:  54%|██████████████▋            | 271/500 [00:26<00:23,  9.93it/s]

Error for MCHP: 'NoneType' object is not subscriptable
Error for MCK: 'NoneType' object is not subscriptable
Error for MCO: 'NoneType' object is not subscriptable


Pandas Apply:  55%|██████████████▋            | 273/500 [00:26<00:24,  9.38it/s]

Error for MDLZ: 'NoneType' object is not subscriptable
Error for MDT: 'NoneType' object is not subscriptable


Pandas Apply:  55%|██████████████▉            | 276/500 [00:27<00:22,  9.96it/s]

Error for MET: 'NoneType' object is not subscriptable
Error for MGM: 'NoneType' object is not subscriptable
Error for MHK: 'NoneType' object is not subscriptable


Pandas Apply:  56%|███████████████            | 278/500 [00:27<00:22,  9.94it/s]

Error for MKC: 'NoneType' object is not subscriptable
Error for MLM: 'NoneType' object is not subscriptable
Error for MMC: 'NoneType' object is not subscriptable


Pandas Apply:  56%|███████████████            | 280/500 [00:27<00:21, 10.35it/s]

Error for MMM: 'NoneType' object is not subscriptable
Error for MNST: 'NoneType' object is not subscriptable
Error for MO: 'NoneType' object is not subscriptable


Pandas Apply:  57%|███████████████▎           | 284/500 [00:27<00:21, 10.02it/s]

Error for MOS: 'NoneType' object is not subscriptable
Error for MPC: 'NoneType' object is not subscriptable


Pandas Apply:  57%|███████████████▍           | 286/500 [00:28<00:21, 10.01it/s]

Error for MRK: 'NoneType' object is not subscriptable
Error for MS: 'NoneType' object is not subscriptable
Error for MSCI: 'NoneType' object is not subscriptable


Pandas Apply:  58%|███████████████▋           | 290/500 [00:28<00:20, 10.34it/s]

Error for MSFT: 'NoneType' object is not subscriptable
Error for MSI: 'NoneType' object is not subscriptable
Error for MTB: 'NoneType' object is not subscriptable


Pandas Apply:  58%|███████████████▊           | 292/500 [00:28<00:19, 10.53it/s]

Error for MTD: 'NoneType' object is not subscriptable
Error for MU: 'NoneType' object is not subscriptable
Error for NCLH: 'NoneType' object is not subscriptable


Pandas Apply:  59%|███████████████▉           | 294/500 [00:28<00:19, 10.59it/s]

Error for NDAQ: 'NoneType' object is not subscriptable
Error for NEE: 'NoneType' object is not subscriptable


Pandas Apply:  60%|████████████████           | 298/500 [00:29<00:19, 10.46it/s]

Error for NEM: 'NoneType' object is not subscriptable
Error for NFLX: 'NoneType' object is not subscriptable
Error for NI: 'NoneType' object is not subscriptable


Pandas Apply:  60%|████████████████▏          | 300/500 [00:29<00:19, 10.30it/s]

Error for NKE: 'NoneType' object is not subscriptable
Error for NKTR: 'NoneType' object is not subscriptable
Error for NOC: 'NoneType' object is not subscriptable


Pandas Apply:  61%|████████████████▍          | 304/500 [00:29<00:18, 10.63it/s]

Error for NOV: 'NoneType' object is not subscriptable
Error for NRG: 'NoneType' object is not subscriptable
Error for NSC: 'NoneType' object is not subscriptable


Pandas Apply:  61%|████████████████▌          | 306/500 [00:30<00:18, 10.32it/s]

Error for NTAP: 'NoneType' object is not subscriptable
Error for NTRS: 'NoneType' object is not subscriptable
Error for NUE: 'NoneType' object is not subscriptable


Pandas Apply:  62%|████████████████▋          | 310/500 [00:30<00:17, 10.77it/s]

Error for NVDA: 'NoneType' object is not subscriptable
Error for NWL: 'NoneType' object is not subscriptable
Error for NWS: 'NoneType' object is not subscriptable


Pandas Apply:  62%|████████████████▊          | 312/500 [00:30<00:17, 10.84it/s]

Error for NWSA: 'NoneType' object is not subscriptable
Error for O: 'NoneType' object is not subscriptable
Error for OKE: 'NoneType' object is not subscriptable


Pandas Apply:  63%|█████████████████          | 316/500 [00:30<00:16, 11.20it/s]

Error for OMC: 'NoneType' object is not subscriptable
Error for ORCL: 'NoneType' object is not subscriptable
Error for ORLY: 'NoneType' object is not subscriptable


Pandas Apply:  64%|█████████████████▏         | 318/500 [00:31<00:16, 11.23it/s]

Error for OXY: 'NoneType' object is not subscriptable
Error for PAYX: 'NoneType' object is not subscriptable
Error for PCAR: 'NoneType' object is not subscriptable


Pandas Apply:  64%|█████████████████▍         | 322/500 [00:31<00:15, 11.15it/s]

Error for PEG: 'NoneType' object is not subscriptable
Error for PEP: 'NoneType' object is not subscriptable
Error for PFE: 'NoneType' object is not subscriptable


Pandas Apply:  65%|█████████████████▍         | 324/500 [00:31<00:16, 10.98it/s]

Error for PFG: 'NoneType' object is not subscriptable
Error for PG: 'NoneType' object is not subscriptable


Pandas Apply:  65%|█████████████████▌         | 326/500 [00:31<00:17, 10.17it/s]

Error for PGR: 'NoneType' object is not subscriptable
Error for PH: 'NoneType' object is not subscriptable
Error for PHM: 'NoneType' object is not subscriptable


Pandas Apply:  66%|█████████████████▋         | 328/500 [00:32<00:17,  9.95it/s]

Error for PKG: 'NoneType' object is not subscriptable
Error for PLD: 'NoneType' object is not subscriptable


Pandas Apply:  66%|█████████████████▊         | 331/500 [00:32<00:17,  9.68it/s]

Error for PM: 'NoneType' object is not subscriptable
Error for PNC: 'NoneType' object is not subscriptable
Error for PNR: 'NoneType' object is not subscriptable


Pandas Apply:  67%|█████████████████▉         | 333/500 [00:32<00:16, 10.10it/s]Exception ignored from cffi callback <function buffer_callback at 0x109e4b560>:
Traceback (most recent call last):
  File "/Users/chloecurtis/.pyenv/versions/3.12.9/envs/corporate-sentiment-tracker/lib/python3.12/site-packages/curl_cffi/curl.py", line 67, in buffer_callback
    @ffi.def_extern()
    
KeyboardInterrupt: 
Pandas Apply:  67%|██████████████████         | 335/500 [00:32<00:14, 11.35it/s]

Error for PNW: 'NoneType' object is not subscriptable
Error for PPG: Failed to perform, curl: (23) Failure writing output to destination, passed 13 returned 0. See https://curl.se/libcurl/c/libcurl-errors.html first for more details.
Error for PPL: 'NoneType' object is not subscriptable


Pandas Apply:  67%|██████████████████▏        | 337/500 [00:32<00:15, 10.66it/s]

Error for PRGO: 'NoneType' object is not subscriptable
Error for PRU: 'NoneType' object is not subscriptable


Pandas Apply:  68%|██████████████████▎        | 339/500 [00:33<00:15, 10.27it/s]

Error for PSA: 'NoneType' object is not subscriptable
Error for PSX: 'NoneType' object is not subscriptable


Pandas Apply:  68%|██████████████████▍        | 341/500 [00:33<00:16,  9.87it/s]

Error for PVH: 'NoneType' object is not subscriptable
Error for PWR: 'NoneType' object is not subscriptable
Error for PYPL: 'NoneType' object is not subscriptable


Pandas Apply:  69%|██████████████████▋        | 345/500 [00:33<00:15,  9.99it/s]

Error for QCOM: 'NoneType' object is not subscriptable
Error for QRVO: 'NoneType' object is not subscriptable
Error for RCL: 'NoneType' object is not subscriptable


Pandas Apply:  69%|██████████████████▋        | 347/500 [00:33<00:15, 10.12it/s]

Error for REG: 'NoneType' object is not subscriptable
Error for REGN: 'NoneType' object is not subscriptable
Error for RF: 'NoneType' object is not subscriptable


Pandas Apply:  70%|██████████████████▉        | 351/500 [00:34<00:13, 10.76it/s]

Error for RHI: 'NoneType' object is not subscriptable
Error for RJF: 'NoneType' object is not subscriptable
Error for RL: 'NoneType' object is not subscriptable


Pandas Apply:  71%|███████████████████        | 353/500 [00:34<00:13, 10.52it/s]

Error for RMD: 'NoneType' object is not subscriptable
Error for ROK: 'NoneType' object is not subscriptable
Error for ROL: 'NoneType' object is not subscriptable


Pandas Apply:  71%|███████████████████▎       | 357/500 [00:34<00:13, 10.60it/s]

Error for ROP: 'NoneType' object is not subscriptable
Error for ROST: 'NoneType' object is not subscriptable
Error for RSG: 'NoneType' object is not subscriptable


Pandas Apply:  72%|███████████████████▍       | 359/500 [00:35<00:13, 10.40it/s]

Error for SBAC: 'NoneType' object is not subscriptable
Error for SBUX: 'NoneType' object is not subscriptable
Error for SCHW: 'NoneType' object is not subscriptable


Pandas Apply:  73%|███████████████████▌       | 363/500 [00:35<00:12, 10.72it/s]

Error for SEE: 'NoneType' object is not subscriptable
Error for SHW: 'NoneType' object is not subscriptable
Error for SJM: 'NoneType' object is not subscriptable


Pandas Apply:  73%|███████████████████▋       | 365/500 [00:35<00:12, 10.78it/s]

Error for SLB: 'NoneType' object is not subscriptable
Error for SLG: 'NoneType' object is not subscriptable


Pandas Apply:  73%|███████████████████▊       | 367/500 [00:35<00:13, 10.06it/s]

Error for SNA: 'NoneType' object is not subscriptable
Error for SNPS: 'NoneType' object is not subscriptable


Pandas Apply:  74%|███████████████████▉       | 369/500 [00:36<00:13,  9.69it/s]

Error for SO: 'NoneType' object is not subscriptable
Error for SPG: 'NoneType' object is not subscriptable


Pandas Apply:  74%|████████████████████       | 372/500 [00:36<00:13,  9.80it/s]

Error for SPGI: 'NoneType' object is not subscriptable
Error for SRE: 'NoneType' object is not subscriptable
Error for STI: 'NoneType' object is not subscriptable


Pandas Apply:  75%|████████████████████▏      | 374/500 [00:36<00:13,  9.65it/s]

Error for STT: 'NoneType' object is not subscriptable
Error for STX: 'NoneType' object is not subscriptable


Pandas Apply:  75%|████████████████████▎      | 376/500 [00:36<00:12, 10.01it/s]

Error for STZ: 'NoneType' object is not subscriptable
Error for SWK: 'NoneType' object is not subscriptable
Error for SWKS: 'NoneType' object is not subscriptable


Pandas Apply:  76%|████████████████████▌      | 380/500 [00:37<00:11, 10.10it/s]

Error for SYF: 'NoneType' object is not subscriptable
Error for SYK: 'NoneType' object is not subscriptable
Error for SYY: 'NoneType' object is not subscriptable


In [13]:
import yfinance as yf
import pandas as pd

ticker = "AAPL"
stock = yf.Ticker(ticker)
earnings_df = stock.get_earnings_dates(limit=12)

print("earnings_df type:", type(earnings_df))
print("earnings_df content:")
print(earnings_df)

print("earnings_df index:")
print(earnings_df.index)
print("earnings_df.index type:", type(earnings_df.index))

TypeError: 'NoneType' object is not subscriptable

In [None]:
# Add columns for earnings date and time
df["EarningsDate"] = None
df["EarningsTime"] = None

for idx, row in df.iterrows():
    ticker = row["ticker"]
    quarter = row["quarter"]

    try:
        start_date, end_date = get_quarter_date_range(quarter)

        # Fetch earnings dates
        ticker_obj = yf.Ticker(ticker)
        earnings_df = ticker_obj.get_earnings_dates(limit=40)

        if earnings_df is not None and not earnings_df.empty:
            # Localize range to match earnings_df index timezone
            tz = earnings_df.index.tz or pytz.timezone("America/New_York")
            start_date = tz.localize(start_date)
            end_date = tz.localize(end_date)

            # Filter earnings within the range
            filtered = earnings_df[(earnings_df.index >= start_date) & (earnings_df.index <= end_date)]

            if not filtered.empty:
                full_dt = filtered.index[0]
                df.at[idx, "EarningsDate"] = full_dt  # full timestamp
                df.at[idx, "EarningsTime"] = full_dt.time()  # just time (HH:MM:SS)
            else:
                print(f"No earnings data for {ticker} in {quarter}")
    except Exception as e:
        print(f"Error fetching earnings for {ticker}: {e}")

In [92]:
df = pd.DataFrame(data, columns=["Quarter", "Ticker", "CIK", "Company"])

In [93]:
def get_close_price(ticker, earnings_dt, session):
    try:
        ticker_obj = yf.Ticker(ticker)
        earnings_dt = pd.Timestamp(earnings_dt)

        if session == "BMO":
            price_date = earnings_dt
        elif session == "AMC":
            price_date = earnings_dt + pd.Timedelta(days=1)
        else:
            return None
            
        price_data = ticker_obj.history(start=price_date, end=price_date + pd.Timedelta(days=2))

        if price_date in price_data.index:
            return price_data.loc[price_date]["Close"]
        else:
            return price_data["Close"].iloc[0] if not price_data.empty else None

        # Calculate CoD (Change on Day %)
        try:
                if session == "BMO":
                    prev_day = full_dt.date() - pd.Timedelta(days=1)
                    price_data = ticker_obj.history(start=prev_day, end=full_dt.date() + pd.Timedelta(days=1))
                    if prev_day in price_data.index and full_dt.date() in price_data.index:
                        prev_close = price_data.loc[prev_day]["Close"]
                        today_close = price_data.loc[full_dt.date()]["Close"]
                        cod = ((today_close - prev_close) / prev_close) * 100                            
                        df.at[idx, "CoD"] = round(cod, 2)
                elif session == "AMC":
                    next_day = full_dt.date() + pd.Timedelta(days=1)
                    price_data = ticker_obj.history(start=full_dt.date(), end=next_day + pd.Timedelta(days=1))
                    if full_dt.date() in price_data.index and next_day in price_data.index:
                        today_close = price_data.loc[full_dt.date()]["Close"]
                        next_close = price_data.loc[next_day]["Close"]
                        cod = ((next_close - today_close) / today_close) * 100
                        df.at[idx, "CoD"] = round(cod, 2)
                    
        except Exception as e:
            print(f"Error getting close price for {ticker} on {earnings_dt}: {e}")

# Initialize columns
df["EarningsDate"] = None
df["EarningsTime"] = None
df["EarningsSession"] = None
df["ClosePriceEarnings"] = None
df["Sector"] = None
df["CoD"] = None

# Process each row
for idx, row in df.iterrows():
    ticker = row["Ticker"]
    quarter = row["Quarter"]

    try:
        start_date, end_date = get_quarter_date_range(quarter)
        ticker_obj = yf.Ticker(ticker)

        earnings_df = ticker_obj.get_earnings_dates(limit=30)

        if earnings_df is not None and not earnings_df.empty:
            tz = earnings_df.index.tz or pytz.timezone("America/New_York")
            start_date = tz.localize(start_date)
            end_date = tz.localize(end_date)

            filtered = earnings_df[(earnings_df.index >= start_date) & (earnings_df.index <= end_date)]

            if not filtered.empty:
                full_dt = filtered.index[0]
                df.at[idx, "EarningsDate"] = full_dt.date()
                df.at[idx, "EarningsTime"] = full_dt.time()

                # Determine session (AMC or BMO)
                session = "AMC" if full_dt.time() >= datetime.strptime("15:00:00", "%H:%M:%S").time() else "BMO"
                df.at[idx, "EarningsSession"] = session

                # Get close price
                close_price = get_close_price(ticker, full_dt.date(), session)
                df.at[idx, "ClosePriceEarnings"] = close_price

        # Get sector info
        info = ticker_obj.info
        df.at[idx, "Sector"] = info.get("sector", "N/A")
        
    except Exception as e:
        print(f"Error processing {ticker}: {e}")

df

SyntaxError: expected 'except' or 'finally' block (828948841.py, line 43)

In [80]:
from datetime import time

# Define cutoff time
cutoff = time(12, 0)  # 3:00 PM

df["EarningsSession"] = df["EarningsTime"].apply(
    lambda t: "AMC" if pd.notnull(t) and t > cutoff else ("BMO" if pd.notnull(t) else None)
)

In [81]:
df

Unnamed: 0,Quarter,Ticker,CIK,Company,EarningsDate,EarningsTime,EarningsSession,ClosePriceEarnings,Sector,CoD
0,Q1-19,A,1090872,"AGILENT TECHNOLOGIES, INC.",2019-02-20,16:05:00,AMC,74.476906,Healthcare,
1,Q1-19,AAL,6201,American Airlines Group Inc.,2019-01-24,07:30:00,BMO,33.124378,Industrials,
2,Q1-19,AAP,1158449,ADVANCE AUTO PARTS INC,2019-02-19,06:30:00,BMO,149.37854,Consumer Cyclical,
3,Q1-19,AAPL,320193,Apple Inc.,2019-01-29,16:30:00,AMC,39.363914,Technology,
4,Q1-19,ABBV,1551152,AbbVie Inc.,2019-01-25,07:47:00,BMO,61.125439,Healthcare,


KeyError: 'ticker'

In [16]:
def get_quarter_date_range(qtr_str):
    qtr, yr = qtr_str.split('-')
    year_full = int("20" + yr) if len(yr) == 2 else int(yr)
    start_date = pd.to_datetime(f"{year_full}-{quarter_months[qtr][0]}")
    end_date = pd.to_datetime(f"{year_full}-{quarter_months[qtr][1]}")
    return start_date, end_date

def get_earnings_date(ticker, start_date, end_date):
    try:
        ticker_obj = yf.Ticker(ticker)
        # Try to get earnings dates DataFrame
        earnings_dates_df = ticker_obj.get_earnings_dates(limit=10)  # last 10 earnings dates

        if earnings_dates_df is None or earnings_dates_df.empty:
            return None

        # Inspect columns
        # Typical columns: 'EPS Estimate', 'Reported EPS', 'Surprise(%)', 'EPS Report Date'
        # Look for date columns:
        date_col_candidates = [col for col in earnings_dates_df.columns if 'date' in col.lower()]
        if not date_col_candidates:
            return None
        
        date_col = date_col_candidates[0]
        
        earnings_dates_df[date_col] = pd.to_datetime(earnings_dates_df[date_col], errors='coerce')
        filtered = earnings_dates_df[
            (earnings_dates_df[date_col] >= start_date) & 
            (earnings_dates_df[date_col] <= end_date)
        ]

        if filtered.empty:
            return None
        
        # Return earliest earnings date in that quarter
        return filtered[date_col].iloc[0].date()
    except Exception as e:
        print(f"Error fetching earnings for {ticker}: {e}")
        return None

earnings_dates = []
for idx, row in df.iterrows():
    start_date, end_date = get_quarter_date_range(row['Quarter'])
    earnings_date = get_earnings_date(row['Ticker'], start_date, end_date)
    earnings_dates.append(earnings_date)

df['EarningsDate'] = earnings_dates
print(df)

  Quarter Ticker         CIK                       Company EarningsDate
0   Q1-19      A  0001090872    AGILENT TECHNOLOGIES, INC.         None
1   Q1-19    AAL  0000006201  American Airlines Group Inc.         None
2   Q1-19    AAP  0001158449        ADVANCE AUTO PARTS INC         None
3   Q1-19   AAPL  0000320193                    Apple Inc.         None
4   Q1-19   ABBV  0001551152                   AbbVie Inc.         None


In [13]:
df

Unnamed: 0,Quarter,Ticker,CIK,Company,EarningsDateRange,EarningsDate
0,Q1-19,A,1090872,"AGILENT TECHNOLOGIES, INC.",2019-01-01 to 2019-03-31,
1,Q1-19,AAL,6201,American Airlines Group Inc.,2019-01-01 to 2019-03-31,
2,Q1-19,AAP,1158449,ADVANCE AUTO PARTS INC,2019-01-01 to 2019-03-31,
3,Q1-19,AAPL,320193,Apple Inc.,2019-01-01 to 2019-03-31,
4,Q1-19,ABBV,1551152,AbbVie Inc.,2019-01-01 to 2019-03-31,


In [26]:
import pandas as pd
import yfinance as yf
from datetime import datetime

# Sample data (replace with your full DataFrame)
data = {
    "Quarter": ["Q1-19", "Q1-19", "Q2-19"],
    "Ticker": ["AAPL", "AAPL", "AAPL"],
}
df = pd.DataFrame(data)

# Helper to convert quarter string to date range
def quarter_to_dates(quarter_str):
    q, y = quarter_str.split("-")
    year = int("20" + y)  # convert '19' -> 2019
    if q == "Q1":
        return (datetime(year, 1, 1), datetime(year, 3, 31))
    elif q == "Q2":
        return (datetime(year, 4, 1), datetime(year, 6, 30))
    elif q == "Q3":
        return (datetime(year, 7, 1), datetime(year, 9, 30))
    elif q == "Q4":
        return (datetime(year, 10, 1), datetime(year, 12, 31))

# Add a new column for earnings date(s)
df["EarningsDate"] = None

for idx, row in df.iterrows():
    ticker = row["Ticker"]
    quarter = row["Quarter"]
    start_date, end_date = quarter_to_dates(quarter)

    ticker_obj = yf.Ticker(ticker)

    try:
        earnings_df = ticker_obj.get_earnings_dates(limit=30)
        if earnings_df.empty:
            print(f"No earnings data for {ticker}")
            continue

        # earnings_df.index contains earnings dates as pandas Timestamp
        # Filter rows where the earnings date falls within quarter range
        mask = (earnings_df.index >= pd.Timestamp(start_date)) & (earnings_df.index <= pd.Timestamp(end_date))
        filtered = earnings_df.loc[mask]

        if not filtered.empty:
            # Store earnings dates as string joined by comma (if multiple)
            earnings_dates_str = ", ".join(date.strftime("%Y-%m-%d") for date in filtered.index)
            df.at[idx, "EarningsDate"] = earnings_dates_str
        else:
            df.at[idx, "EarningsDate"] = None
            print(f"No earnings in quarter {quarter} for {ticker}")

    except Exception as e:
        print(f"Error fetching earnings for {ticker}: {e}")
        df.at[idx, "EarningsDate"] = None

print(df)


Error fetching earnings for AAPL: Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp
Error fetching earnings for AAPL: Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp
Error fetching earnings for AAPL: Invalid comparison between dtype=datetime64[ns, America/New_York] and Timestamp
  Quarter Ticker EarningsDate
0   Q1-19   AAPL         None
1   Q1-19   AAPL         None
2   Q2-19   AAPL         None


In [18]:
import yfinance as yf
import pandas as pd

ticker = "AAPL"
quarter = "Q1-19"

# Helper to get quarter date range
def get_quarter_date_range(qtr_str):
    quarter_months = {
        "Q1": ("01-01", "03-31"),
        "Q2": ("04-01", "06-30"),
        "Q3": ("07-01", "09-30"),
        "Q4": ("10-01", "12-31"),
    }
    qtr, yr = qtr_str.split('-')
    year_full = int("20" + yr)
    start_date = pd.to_datetime(f"{year_full}-{quarter_months[qtr][0]}")
    end_date = pd.to_datetime(f"{year_full}-{quarter_months[qtr][1]}")
    return start_date, end_date

start_date, end_date = get_quarter_date_range(quarter)
print(f"Looking for earnings dates between {start_date.date()} and {end_date.date()}")

ticker_obj = yf.Ticker(ticker)

try:
    # Try get_earnings_dates
    earnings_dates_df = ticker_obj.get_earnings_dates(limit=10)
    print("Earnings Dates DataFrame:")
    print(earnings_dates_df)

    if earnings_dates_df is None or earnings_dates_df.empty:
        print("No earnings dates data found.")
    else:
        # Identify possible date columns
        date_cols = [col for col in earnings_dates_df.columns if 'date' in col.lower()]
        print("Date columns found:", date_cols)
        
        if date_cols:
            date_col = date_cols[0]
            earnings_dates_df[date_col] = pd.to_datetime(earnings_dates_df[date_col], errors='coerce')
            filtered = earnings_dates_df[
                (earnings_dates_df[date_col] >= start_date) & 
                (earnings_dates_df[date_col] <= end_date)
            ]
            print(f"Earnings dates within the quarter {quarter}:")
            print(filtered)
        else:
            print("No date columns found in earnings data.")
except Exception as e:
    print(f"Error fetching earnings dates: {e}")


Looking for earnings dates between 2019-01-01 and 2019-03-31
Earnings Dates DataFrame:
                           EPS Estimate  Reported EPS  Surprise(%)
Earnings Date                                                     
2026-04-29 06:59:00-04:00           NaN           NaN          NaN
2026-01-28 16:00:00-05:00           NaN           NaN          NaN
2025-10-29 06:59:00-04:00           NaN           NaN          NaN
2025-07-30 06:59:00-04:00           NaN           NaN          NaN
2025-05-01 16:30:00-04:00          1.63          1.65         1.41
2025-01-30 16:31:00-05:00          2.35          2.40         2.15
2024-10-31 16:31:00-04:00          1.60          1.64         2.35
2024-08-01 16:30:00-04:00          1.35          1.40         3.99
2024-05-02 16:31:00-04:00          1.50          1.53         1.97
2024-02-01 16:00:00-05:00          2.10          2.18         3.90
Date columns found: []
No date columns found in earnings data.


In [24]:
#changings
tickers = "WMT"
start_year = 2014
end_year = 2024

ticker = yf.Ticker(tickers)

# Historical Prices
price_data = ticker.history(start=f"{start_year}-01-01", end=f"{end_year}-12-31")

In [25]:
# Earnings Dates
earnings_df = ticker.get_earnings_dates(limit=100)
earnings_dates = earnings_df.loc[
    earnings_df.index.to_series().dt.year.between(start_year, end_year)
].index.normalize()

# Shares Outstanding
shares_outstanding = ticker.info.get("sharesOutstanding")

# --- 1) Market Cap on Earnings Dates ---
market_caps = []
for date in earnings_dates:
    date = pd.to_datetime(date) #normalize()

    # Shift to previous available trading day if it's not in the price data
    while date not in price_data.index:
        date -= pd.Timedelta(days=1)

    close_price = price_data.loc[date]['Close']
    market_cap = close_price * shares_outstanding
    market_caps.append({
        'Date': date.strftime('%Y-%m-%d'),
        'Close Price': round(close_price, 2),
        'Estimated Market Cap': round(market_cap)
    })

market_cap_df = pd.DataFrame(market_caps)

# closing price df
closing_price_df = price_data[['Close']].copy()
closing_price_df.columns = [tickers]
closing_price_df.index = closing_price_df.index.normalize()
closing_price_df.index = closing_price_df.index.date

# --- 3) Latest Market Cap ---
latest_price = price_data['Close'].iloc[-1]
latest_market_cap = latest_price * shares_outstanding

info = ticker.info
sector = info.get("sector", "N/A")
industry = info.get("industry", "N/A")
long_name = info.get("longName", "N/A")
short_name = info.get("shortName", "N/A")

latest_market_cap_df = pd.DataFrame([{
    'Ticker': tickers,
    'Short Name': short_name,
    'Long Name': long_name,
    'Latest Price': round(latest_price, 2),
    'Latest Market Cap': round(latest_market_cap),
    'Sector': sector,
    'Industry': industry
}])

#Market Cap on Earnings Dates for singular ticker
print(market_cap_df)

#Daily Closing Prices - can add column for each ticker
print(closing_price_df.head(n=10))

#Last Market Caps - can add row for each ticker
latest_market_cap_df

          Date  Close Price  Estimated Market Cap
0   2024-11-19        85.96          687789142137
1   2024-08-15        72.44          579553715562
2   2024-05-16        63.36          506931258636
3   2024-02-20        57.63          461073039406
4   2023-11-16        50.94          407594740495
5   2023-08-17        50.83          406680576322
6   2023-05-18        49.28          394255885126
7   2023-02-21        47.55          380469378651
8   2022-11-15        47.41          379323468733
9   2022-08-16        44.82          358561546563
10  2022-05-17        42.05          336462836311
11  2022-02-17        44.13          353097407349
12  2021-11-16        45.31          362544777662
13  2021-08-17        47.70          381612816371
14  2021-05-18        44.75          358036707917
15  2021-02-18        43.06          344516832659
16  2020-11-17        46.55          372461652097
17  2020-08-18        41.98          335906225332
18  2020-05-19        38.78          310293513415


Unnamed: 0,Ticker,Short Name,Long Name,Latest Price,Latest Market Cap,Sector,Industry
0,WMT,Walmart Inc.,Walmart Inc.,90.1,720913126468,Consumer Defensive,Discount Stores
