In [1]:
from yahoo_fin.stock_info import get_data
import pandas as pd
# !pip install pandas_market_calendars
import pandas_market_calendars as mcal
from datetime import datetime
import matplotlib.pyplot as plt
from tqdm import tqdm  # progress bar on loops
import seaborn as sns
from datetime import timedelta

In [2]:
# load files
transactions = pd.read_csv('../../data/inputs/transactions.csv')

In [3]:
# read from text file
file = open('../../data/inputs/bad_tickers.txt', 'r')
# get the list of bad tickers from file, separated by a comma
bad_tickers = file.read().split(',')
file.close()

# drop the bad_tickers from the transactions
transactions = transactions[~transactions['ticker'].isin(bad_tickers)]

# drop all the transactions happens before 2014-01-01
transactions = transactions[transactions['transaction_date'] >= '2014-01-01']

In [4]:
# since we are only focusing on 3 sectors (Technology, Financial Services, and Health Care), 
# we will drop all the other sectors
transactions = transactions[transactions['sector'].isin(['Technology', 'Finance', 'Health Care'])]

In [6]:
def get_stock_data(ticker_list, start_date, end_date):
    data = pd.DataFrame()
    bad_tickers = []
    for ticker in ticker_list:
        try: 
            df = get_data(ticker=ticker, start_date=start_date, end_date=end_date, 
                          index_as_date=False, interval='1d')
            df = df[['date', 'ticker', 'adjclose']]
            data = pd.concat([data, df], axis=0)
        except:
            bad_tickers.append(ticker)
            continue
    # data['date'] = pd.to_datetime(data['date'])
    return data

# data = get_stock_data(['amzn'], '04/29/2014', '04/28/2024')
# data

In [7]:
# generate all the trading dates from 2014-12-01 to 2024-04-29
temp_df = get_stock_data(['AAPL'], '12/01/2014', '04/29/2024')
trading_dates = temp_df[['date']]
# turn it into a list
trading_dates = trading_dates['date'].tolist()
# strip the time from the date
trading_dates = [str(date).split()[0] for date in trading_dates]

In [10]:
# parameter: %Y-%m-%d'  
def is_trading_day(date):
    return date in trading_dates

#  return the date itself if it is a trading day, otherwise return the next trading day
# param: %Y-%m-%d'
def get_next_trading_date(date):
    # check if it's in trading_dates
    if is_trading_day(date):
        return date
    else:
        while not is_trading_day(date):
            date = datetime.strptime(date, '%Y-%m-%d') + timedelta(days=1)
            date = date.strftime('%Y-%m-%d')
        return date

# parameter: %Y-%m-%d'
def get_trading_dates(disclosure_date, num_days_before, num_days_after):
    disclosure_date = get_next_trading_date(disclosure_date)
    # locate the index of the disclosure date
    disclosure_date_index = trading_dates.index(disclosure_date)
    # get the trading dates before the disclosure date
    trading_dates_before = trading_dates[disclosure_date_index - num_days_before:disclosure_date_index]
    # get the trading dates after the disclosure date
    trading_dates_after = trading_dates[disclosure_date_index:disclosure_date_index + 2 + num_days_after]
    results = trading_dates_before + trading_dates_after
    # format the date to %Y/%m/%d
    results = [datetime.strptime(date, '%Y-%m-%d').strftime('%Y/%m/%d') for date in results]
    
    return results

In [11]:
# get stock daily return before and after the disclosure date 
# ticker | daily_return | trading_days_before_after | disclosure_date | sector
# trading_days_before_after = -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7
# only trading days

# iterate over the transactions table
# for each transaction, get the daily return before and after the disclosure date
# append the result to a new dataframe

columns = ['ticker', 'daily_return', 'trading_days_before_after', 'disclosure_date', 'sector', 'cum_return']
purchase_returns_df = pd.DataFrame(columns=columns)
sell_returns_df = pd.DataFrame(columns=columns)

for index in tqdm(range(len(transactions))):
    row = transactions.iloc[index]
    disclosure_date = row['disclosure_date']
    dates = get_trading_dates(disclosure_date, 7, 7)
    # get the stock data, stock data consists of date, ticker, and adjclose
    stock_data = get_stock_data([row['ticker']], dates[0], dates[-1])
    # check if stock_data is empty
    if stock_data.empty:
        continue
    # calculate the daily return
    stock_data['daily_return'] = stock_data['adjclose'].pct_change()
    stock_data['sector'] = row['sector']
    stock_data['disclosure_date'] = disclosure_date
    stock_data['trading_days_before_after'] = list(range(-7, 8))
    # fill the missing values with 0
    stock_data['daily_return'] = stock_data['daily_return'].fillna(0)
    # calculate the cumulative return
    stock_data['cum_return'] = (1 + stock_data['daily_return']).cumprod() - 1
    # separate the purchase and sell transactions
    if row['type'] == 'purchase':
        purchase_returns_df = pd.concat([purchase_returns_df, stock_data], axis=0)
    else:
        sell_returns_df = pd.concat([sell_returns_df, stock_data], axis=0)
            

  0%|          | 0/7030 [00:00<?, ?it/s]

  sell_returns_df = pd.concat([sell_returns_df, stock_data], axis=0)
  purchase_returns_df = pd.concat([purchase_returns_df, stock_data], axis=0)
100%|██████████| 7030/7030 [20:37<00:00,  5.68it/s]


In [12]:
purchase_returns_df.head()

Unnamed: 0,ticker,daily_return,trading_days_before_after,disclosure_date,sector,cum_return,date,adjclose
0,AXP,0.0,-7,2021-12-01,Finance,0.0,2021-11-19,167.757706
1,AXP,-0.015213,-6,2021-12-01,Finance,-0.015213,2021-11-22,165.205688
2,AXP,-0.000292,-5,2021-12-01,Finance,-0.0155,2021-11-23,165.157394
3,AXP,0.004448,-4,2021-12-01,Finance,-0.011121,2021-11-24,165.892059
4,AXP,-0.086184,-3,2021-12-01,Finance,-0.096346,2021-11-26,151.594849


In [13]:
# save the data to csv
purchase_returns_df.to_csv('../../data/outputs/purchase_impact.csv', index=False)
sell_returns_df.to_csv('../../data/outputs/sell_impact.csv', index=False)