In [249]:
import yfinance as yf
from yahoo_fin.stock_info import get_data
import pandas as pd

In [254]:
tickers = ['AAPL', 'AMZN', 'TSLA', 'GOOGL', 'PFE']
hist_data = []

for ticker in tickers:
    hist_data.append(get_data(ticker, start_date = '1/1/2012', 
                              end_date = '12/31/2022', index_as_date = True))

open_price = []
close_price = []
for i in range(len(hist_data)):
    open_price.append(hist_data[i]['open'])
    close_price.append(hist_data[i]['adjclose'])
    
open_data = pd.DataFrame(open_price, tickers)
open_data = open_data.transpose()
open_data = open_data.reindex(sorted(open_data.columns), axis=1)

close_data = pd.DataFrame(close_price, tickers)
close_data = close_data.transpose()
close_data = close_data.reindex(sorted(close_data.columns), axis=1)

In [255]:
daytime_return = []
daily_return = pd.DataFrame()
overnight_return = []

for ticker in open_data:
    daytime_return.append((close_data[ticker] / open_data[ticker]) - 1)
    
    daily = []
    for j in range(1, len(close_data[ticker])):
        daily.append((1 - (close_data[ticker][j-1]  / close_data[ticker][j])) * 100)
    daily_return[ticker] = daily

daytime_return = pd.DataFrame(daytime_return, tickers)
daytime_return = daytime_return.transpose()

daily_return['Date'] = (close_data.index.tolist())[1:]
daily_return = daily_return.set_index('Date')

for ticker in open_data:
    overnight_return.append(((1 + daily_return[ticker]) / (1 + daytime_return[ticker])) - 1)
    
overnight_return = pd.DataFrame(overnight_return, tickers)
overnight_return = overnight_return.transpose()

reversals = pd.DataFrame()

for ticker in overnight_return:
    rev = []
    for j in range(len(overnight_return[ticker])):
        if (overnight_return[ticker][j] > 0 and daytime_return[ticker][j] < 0):
            rev.append("Negative")
        elif (overnight_return[ticker][j] < 0 and daytime_return[ticker][j] > 0):
            rev.append("Positive")
        else:
            rev.append(False)
    reversals[ticker] = rev

reversals['Date'] = (daytime_return.index.tolist())
reversals = reversals.set_index('Date')
reversals = reversals.reset_index()
reversals['year_month'] = reversals['Date'].dt.to_period('M')
reversals['lag'] = reversals['year_month'].shift(1)

pos_ratio = pd.DataFrame()
neg_ratio = pd.DataFrame()

for ticker in reversals:
    
    pos_rat = []
    neg_rat = []
    
    neg_count = 0
    pos_count = 0
    tot = 0
    for j in range(len(reversals[ticker])):
        if (reversals['year_month'][j]) == (reversals['lag'][j]) or type(reversals['lag'][j]) == pd._libs.tslibs.nattype.NaTType:
            if reversals[ticker][j] == 'Negative':
                neg_count += 1
            elif reversals[ticker][j] == 'Positive':
                pos_count += 1
            tot += 1
        else:
            pos_rat.append(pos_count / tot)
            neg_rat.append(neg_count / tot)
            neg_count = 0
            pos_count = 0
            tot = 0 
        if j == (len(reversals[ticker])-1):
            pos_rat.append(pos_count / tot)
            neg_rat.append(neg_count / tot)
    pos_ratio[ticker] = pos_rat
    neg_ratio[ticker] = neg_rat
    
pos_ratio['Date'] = reversals['year_month'].unique()
neg_ratio['Date'] = reversals['year_month'].unique()

pos_ratio = pos_ratio.set_index('Date')
neg_ratio = neg_ratio.set_index('Date')
reversals = reversals.set_index('Date')

pos_ratio = pos_ratio.drop(['year_month', 'lag'], axis = 1)
neg_ratio = neg_ratio.drop(['year_month', 'lag'], axis = 1)
reversals = reversals.drop(['year_month', 'lag'], axis = 1)

In [256]:
neg_windows = neg_ratio.rolling(12) 
neg_rolling_avg = neg_windows.mean()[12:]

pos_windows = pos_ratio.rolling(12) 
pos_rolling_avg = pos_windows.mean()[12:]

ab_pos_ratio = (pos_ratio * pos_rolling_avg)[12:]
ab_neg_ratio = (neg_ratio * neg_rolling_avg)[12:]

In [253]:
daytime_return.to_csv('daytime_returns.csv')
daily_return.to_csv('daily_returns.csv')
overnight_return.to_csv('overnight_returns.csv')
pos_ratio[12:].to_csv('positive_ratio.csv')
neg_ratio[12:].to_csv('negative_ratio.csv')
ab_pos_ratio.to_csv('abnormal_positive_ratio.csv')
ab_neg_ratio.to_csv('abnormal_negative_ratio.csv')

In [259]:
ab_neg_ratio

Unnamed: 0_level_0,AAPL,AMZN,TSLA,GOOGL,PFE
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2013-01,0.183871,0.005471,0.033670,0.308987,0.003739
2013-02,0.248418,0.003053,0.021838,0.225847,0.004168
2013-03,0.285707,0.015263,0.055198,0.354117,0.003971
2013-04,0.296676,0.003224,0.022478,0.248643,0.003155
2013-05,0.303479,0.000000,0.023611,0.200426,0.014887
...,...,...,...,...,...
2022-08,0.022885,0.002834,0.000000,0.196353,0.017443
2022-09,0.021840,0.000000,0.003756,0.193656,0.004589
2022-10,0.019340,0.006235,0.007511,0.270861,0.014391
2022-11,0.024010,0.006235,0.000000,0.248320,0.004380
