In [1]:
#!pip install yfinance
import yfinance as yf
import pandas as pd
import csv
import glob
import os
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta

open csv file and save it as a dictionary

In [2]:
csv_file = 'filtered_stock.csv'

data = {}

# Read data from the CSV file
with open(csv_file, newline='') as csvfile:
    reader = csv.DictReader(csvfile)
    
    for row in reader:
        key = row['Key']
        values = row['Values'].split(', ')  # Split the values by commas
        data[key] = values
        
# Now 'data' contains the dictionary you originally wrote to the CSV file
#print(data)

Fetch the earning call dates for each companies

In [3]:
earn_dict = {}

for inds in data:
    file_loc = '../sectors/' + inds
    for cmp in data[inds]:
        folder_names = glob.glob(file_loc + '/' + cmp + '/*')
        
        for name in folder_names:
            if 'names' in name:
                folder_names.remove(name)
                
        earn_dates = []
        for earn_logs in folder_names:
            file = open(earn_logs, encoding='utf8') #added encoding = 'utf8' because my OS is in Korean
            earn_log = csv.reader(file)
            next(earn_log)
            next(earn_log)
            earn_date = next(earn_log)
            earn_dates.append(str(earn_date[0]).split(' ')[0])
            file.close()
        earn_dict[cmp] = earn_dates



KeyboardInterrupt: 

Write the earning call date dictionary as csv file 

In [72]:
csv_file = 'earning_call_dates.csv'

with open(csv_file, 'w', newline='') as csvfile:
    fieldnames = ['Key', 'Values']  # Define the CSV header
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    # Write the header row
    writer.writeheader()
    
    # Write data from the dictionary to the CSV
    for key, values in earn_dict.items():
        writer.writerow({'Key': key, 'Values': ', '.join(values)})

Create folders for each industires

In [66]:
for inds in data:
    os.mkdir(inds)

Create a dataframe for each company with stock prices for various dates 

In [None]:
date_format = '%Y-%m-%d'
currwd = os.getcwd()

for inds in data:
        
    earn_impc_price = pd.DataFrame(columns = ['Day +1', 'Day +2', 'Day +7', 'Day +28'])
    earn_impc_perc = pd.DataFrame(columns = ['Day +1', 'Day +2', 'Day +7', 'Day +28'])
    #create two dataframes, one for recording average price gap and the other for 
    #recording avearge price percentage change.
    
    for cmp in data[inds]:
        price_df = pd.DataFrame(columns=['Day 0', 'Day +1', 'Day +2', 'Day +7', 'Day +28'])
        price_gap = pd.DataFrame(columns=['Day +1', 'Day +2', 'Day +7', 'Day +28'])
        price_perc = pd.DataFrame(columns=['Day +1', 'Day +2', 'Day +7', 'Day +28'])
        #create three dataframes, one for tracking price data, one for tracking price gap, 
        #one for tracking price percentage change
        
        count = 0
        #count the total dates calculated
                
        for earn_date in earn_dict[cmp]:
            #iterate through the earnings call dates of each companies
                        
            count += 1
            
            datetime_earn_date = datetime.strptime(earn_date, date_format)
            yfdata = yf.download(cmp, earn_date)
            datetime_yfdata_date = datetime.strptime(str(yfdata.index[0]).split(' ')[0], date_format)
            #convert string dates to datetime objects for comparison
            
            while(datetime_earn_date < datetime_yfdata_date):
                temp_date = datetime_earn_date - timedelta(days=1)
                yfdata = yf.download(cmp, temp_date.strftime(date_format))
                datetime_yfdata_date = datetime.strptime(str(yfdata.index[0]).split(' ')[0], date_format)
                #Sometimes earnings call dates are during the weekends, where stock market is not going on.
                #We would recursively go back a day until our first date in yf data is the same date or 
                #a day before the earnings call date
            
            #print(yfdata)
            
            df_row = {'Day 0': yfdata.loc[yfdata.index[0]].at['Close'], 
                       'Day +1': yfdata.loc[yfdata.index[1]].at['Open'], 
                       'Day +2': yfdata.loc[yfdata.index[2]].at['Open'], 
                       'Day +7': yfdata.loc[yfdata.index[5]].at['Open'], 
                       'Day +28': yfdata.loc[yfdata.index[20]].at['Open']}
            #Get the closing price of the stock before earnings call, and compare with the opening price of
            #four days: the day right after the earnings call, 2 days later, a week later, and a month later
            
            #print(df_row)
            
            price_df.loc[earn_date] = df_row
            
            gap_row = {'Day +1': df_row['Day +1'] - df_row['Day 0'], 
                       'Day +2': df_row['Day +2'] - df_row['Day 0'], 
                       'Day +7': df_row['Day +7'] - df_row['Day 0'], 
                       'Day +28': df_row['Day +28'] - df_row['Day 0']}
            #From df_row, calculate the gap between closing price of stock before earnings call with 
            #four opening prices.
            
            price_gap.loc[earn_date] = gap_row
            
            perc_row = {'Day +1': int((df_row['Day +1'] / df_row['Day 0'])*100 -100), 
                       'Day +2': int((df_row['Day +2'] / df_row['Day 0'])*100 - 100), 
                       'Day +7': int((df_row['Day +7'] / df_row['Day 0'])*100 - 100), 
                       'Day +28': int((df_row['Day +28'] / df_row['Day 0'])*100 - 100)}
            #From df_row, calculate the percentage change between closing price of stock before 
            #earnings call with four opening prices.
            
            price_perc.loc[earn_date] = perc_row
            
        #print(price_df)
        #print(price_gap)
        #print(price_perc)
        
        gap_sum_1 = 0
        gap_sum_2 = 0
        gap_sum_7 = 0
        gap_sum_28 = 0
        
        for chng in price_gap['Day +1']:
            gap_sum_1 += abs(chng)
        
        for chng in price_gap['Day +2']:
            gap_sum_2 += abs(chng)
            
        for chng in price_gap['Day +7']:
            gap_sum_7 += abs(chng)
        
        for chng in price_gap['Day +28']:
            gap_sum_28 += abs(chng)
            
        gap_avg_row = {'Day +1': gap_sum_1 / count, 
                       'Day +2': gap_sum_2 / count, 
                       'Day +7': gap_sum_7 / count, 
                       'Day +28': gap_sum_28 / count}
        #get the average of price gaps(abs value) of each day and save it as a dataframe
        
        earn_impc_price.loc[cmp] = gap_avg_row
        #print(earn_impc_price)
        
        perc_sum_1 = 0
        perc_sum_2 = 0
        perc_sum_7 = 0
        perc_sum_28 = 0
        
        for chng in price_perc['Day +1']:
            perc_sum_1 += abs(chng)
        
        for chng in price_perc['Day +2']:
            perc_sum_2 += abs(chng)
            
        for chng in price_perc['Day +7']:
            perc_sum_7 += abs(chng)
        
        for chng in price_perc['Day +28']:
            perc_sum_28 += abs(chng)
            
        perc_avg_row = {'Day +1': perc_sum_1 / count, 
                       'Day +2': perc_sum_2 / count, 
                       'Day +7': perc_sum_7 / count, 
                       'Day +28': perc_sum_28 / count}
        #get the average of price percentage change(abs value) of each day and save it as a dataframe
        
        earn_impc_perc.loc[cmp] = perc_avg_row
        print(earn_impc_perc)
        
        break
        
        
    temp_perc = earn_impc_perc
    
    #print(earn_impc_price)
    #print(earn_impc_perc)
    
    earn_impc_price.to_csv(currwd + '/' + inds + '/' + 'price_gap.csv')
    earn_impc_perc.to_csv(currwd + '/' + inds + '/' + 'price_perc.csv')
    #save the data for each companies to csv file for further analysis
    
    break
    

In [76]:
temp

Unnamed: 0,Day +1,Day +2,Day +7,Day +28
SB,0.852941,4.029412,5.823529,14.617647


With the given data, we would further analyze the impact of earnings call on each industries

In [None]:
earn_impc_inds = pd.DataFrame(columns = ['Day +1'])

for inds in data:
    csv_file = inds + '/' + 'price_gap.csv'
    file = open(csv_file)
    price_gap = csv.reader(file)
    
    file.close()
    

    csv_file = inds + '/' + 'price_perc.csv'
    file = open(csv_file)
    perc_gap = csv.reader(file)
    
    plt.hist(perc_gap['Day +1'])
    plt.show()
    #plot a histogram of average percentage change of each companies within the industry
    
    plt.boxplot(perc_gap['Day +1'])
    plt.show()
    #plot a boxplot of average percentage change to identify the distribution and outliers
    
    loc_mean = sum(perc_gap['Day +1'])/len(perc_gap['Day +1'])
    print('avg percentage change in ' + inds + 'industry on day +1 after earnings call = ' 
          + loc_mean)
    #get the mean value of average percentage changes
    #this will represent the implication of earnings call on the particular industry
    
    earn_impc_inds[inds] = loc_mean
    
    file.close()

y_pos = np.arange(len(data))

plt.bar(y_pos, earn_impc_inds['Day + 1'])
plt.xticks(y_pos, earn_impc_inds.index)
plt.show()
#Plot bar graph showing average percentage change after the earnings call of each industry

fig = plt.figure()