In [89]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy.stats import ttest_1samp

import preprocessing

In [91]:
def ols(y, x):
    # ols regression
    # y = a + bx + e
    ## y : portfolio 수익률
    ## x : market 수익률
    ## e : 오차항(=0)

    x = sm.add_constant(x) # 상수항(a) 추가

    model = sm.OLS(y, x) # ols 모델 생성
    results = model.fit() # ols 모델 적합(a, b 추정)

    # results.params.values : (추정a, 추정b)
    hat_a, hat_b = results.params.values
    return (hat_a, hat_b), results

In [100]:
import scipy.stats as stats
from scipy.stats import wilcoxon

def test_aers(aers, alpha=0.05):
    # 표본의 크기 > 30
    print(f'검정 표본 크기 : {len(aers)}')
    
    # 정규성 검정 (Shapiro-Wilk test)
    stat, p_value = stats.shapiro(aers)

    print(f"정규성 검정: Statistic = {stat}, p-value = {np.round(p_value, 4)}")
    print(f'-> 정규성 만족 여부 : {p_value > 0.05}')


    # Wilcoxon Signed Rank Test 수행
    statistic, p_value = wilcoxon(aers - 0, alternative='greater')
    print(f"Wilcoxon Signed Rank 검정: Statistic = {statistic}, p-value = {np.round(p_value, 4)}")
    res_wilcoxon = p_value < 0.05
    print(f"-> 검정 결과 {'H0 기각, H1 채택(평균AER > 0)' if res_wilcoxon else 'H0 기각 불가'}")

    # 일표본 t-검정(단측검정)
    # H1 : mean(초과수익률) > 0 (pvalue < 0.05)
    result_ttest = ttest_1samp(aers, 0, alternative='greater')
    return result_ttest

In [101]:
def model_market(
        data,
        year_start = 2000,
        year_end = 2021,
        col_year = '귀속년도',
        col_rtn = 'rtn',
        col_event = 'event',
        val_event = 'santa',
        _print=True
    ):
    col_portfolio = col_rtn + '_portfolio'
    col_market = col_rtn + '_market'

    """ (1) 추정기간의 자료를 바탕으로 정상적인 기대수익률 모형 추정 """
    
    # 추정 기간의 자료
    data_before = data[data[col_event] != val_event]

    # 실제 수익률
    y = data_before[col_portfolio] # 포트폴리오 일 수익률
    x = data_before[col_market] # 시장 일 수익률

    # 알파, 베타 추정
    (hat_alpha, hat_beta), result_ols = ols(y, x)

    # 
    if _print:
        print("시장모형 추정 결과")
        print(result_ols.summary())
        print()


    """ (2) Event 기간의 자료를 바탕으로 정상적인 기대수익률 모형 값을 연결하여 초과수익률 계산 """
    
    # Event 기간의 자료
    data_event = data[data[col_event] == val_event]

    # 실제 수익률
    rtns_portfolio = data_event[col_portfolio] # 포트폴리오 일 수익률
    rtns_market = data_event[col_market] # 시장 일 수익률

    # 정상적 기대수익률 추정
    hat_portfolio = hat_alpha + hat_beta * rtns_market
    # hat_portfolio = result_ols.predict(rtns_market)

    data_event['hat_portfolio'] = hat_portfolio

    # 초과수익률 계산 : 실제수익률 - 추정된 정상적 기대수익률
    aers = rtns_portfolio - hat_portfolio
    data_event['aers'] = aers

    # 
    if _print:
        print("연도별 초과수익률 평균")
        print(data_event[[col_year, 'aers']].groupby(col_year).mean())
        print()


    """ (3) 초과수익률의 유의성 검정 """
    result_test = test_aers(data_event['aers'])
    
    if _print:
        print("초과수익률 유의성 검정 (T-Test) 결과")
        print(result_test)
    if result_test.pvalue < 0.05:
        print(f"유의수준 0.05하에서 T-Test 검정 결과, pvalue={np.round(result_test.pvalue,4)}로 초과수익률이 유의함(초과수익률>0)")
    else:
        print("Event Study 결과, 초과수익률이 유의하지 않음")

    return data_event, (result_ols.summary(), result_test)

In [102]:
def study(
    symbol_portfolio, 
    symbol_market = '^KS11',
    col_rtn = 'rtn',
    col_event = 'event', 
    _print=False
):
    # 시장(KOSPI)의 일 수익률
    if _print:
        print(f"Market : {symbol_market}")
    data_market = preprocessing.get_data(symbol_market)[[col_rtn, col_event, '귀속년도']]

    # 포트폴리오의 일 수익률
    if _print:
        print(f"Portfolio : {symbol_portfolio}")
    data_portfolio = preprocessing.get_data(symbol_portfolio)[[col_rtn]]

    # 포트폴리오 날짜를 기준으로 병합
    data = pd.merge(
        data_portfolio, data_market,
        left_index=True, right_index=True,
        how='left',
        suffixes=('_portfolio', '_market')
    )

    # Event기간(data[col_event]=='santa')과 
    # 추정기간(data[col_event]=='before')의
    # 데이터만 남기기
    data = data[
        (data[col_event]=='santa') | (data[col_event] == 'before')
    ]
    data_event, results = model_market(data, _print=_print)

    return data, data_event, results

In [105]:
# KODEX 반도체
symbol = '091160.KS'

# # KOSDAQ
# symbol = '^KQ11'

# # 삼성전자
# symbol = '005930.KS'

data, data_event, results = study(symbol, _print=False)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
검정 표본 크기 : 105
정규성 검정: Statistic = 0.9945512413978577, p-value = 0.9541
-> 정규성 만족 여부 : True
Wilcoxon Signed Rank 검정: Statistic = 3962.0, p-value = 0.0001
-> 검정 결과 H0 기각, H1 채택(평균AER > 0)
유의수준 0.05하에서 T-Test 검정 결과, pvalue=0.0로 초과수익률이 유의함(초과수익률>0)


In [76]:
# data.groupby(['event', '귀속년도']).mean()

data.pivot_table(
    index='귀속년도',
    columns='event',
    aggfunc='mean'
)

Unnamed: 0_level_0,rtn_market,rtn_market,rtn_portfolio,rtn_portfolio
event,before,santa,before,santa
귀속년도,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2000.0,0.445201,-0.46022,1.364686,-0.722707
2001.0,-0.104465,1.331945,0.396138,2.265756
2002.0,0.152695,-0.96916,-0.242229,-0.491605
2003.0,0.24932,0.34954,-0.324807,0.500093
2004.0,0.459975,0.164505,0.761273,0.410662
2005.0,0.156277,0.459638,0.32938,0.665263
2006.0,0.526659,-0.326851,0.307823,0.058164
2007.0,-0.463131,0.074807,-1.325274,-0.133683
2008.0,0.339921,-0.057134,0.147944,-0.096996
2009.0,-0.010503,0.302017,-0.067714,0.873711


In [77]:
data_event

Unnamed: 0_level_0,rtn_portfolio,rtn_market,event,귀속년도,hat_portfolio,aers
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-12-19,-0.842659,-1.570648,santa,2000.0,-1.845042,1.002383
2000-12-20,-5.382490,-3.125465,santa,2000.0,-3.638663,-1.743827
2000-12-21,-2.994004,-0.449238,santa,2000.0,-0.551395,-2.442609
2000-12-22,-3.086411,-2.207460,santa,2000.0,-2.579662,-0.506749
2000-12-26,0.636949,0.803034,santa,2000.0,0.893214,-0.256265
...,...,...,...,...,...,...
2023-12-26,0.922266,0.118487,santa,2023.0,0.103527,0.818739
2023-12-27,1.827676,0.419194,santa,2023.0,0.450421,1.377256
2023-12-28,0.641026,1.598624,santa,2023.0,1.810998,-1.169973
2024-01-02,1.401274,0.547213,santa,2023.0,0.598101,0.803173


In [78]:
data_event.pivot_table(
    index='귀속년도',
    columns='event',
    aggfunc='mean'
)

Unnamed: 0_level_0,aers,hat_portfolio,rtn_market,rtn_portfolio
event,santa,santa,santa,santa
귀속년도,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
2000.0,-0.158644,-0.564063,-0.46022,-0.722707
2001.0,0.762396,1.50336,1.331945,2.265756
2002.0,0.659567,-1.151172,-0.96916,-0.491605
2003.0,0.130025,0.370068,0.34954,0.500093
2004.0,0.254049,0.156613,0.164505,0.410662
2005.0,0.168187,0.497076,0.459638,0.665263
2006.0,0.468374,-0.41021,-0.326851,0.058164
2007.0,-0.186822,0.053138,0.074807,-0.133683
2008.0,0.002072,-0.099068,-0.057134,-0.096996
2009.0,0.558465,0.315246,0.302017,0.873711


In [79]:
# ols 회귀적합결과
# const -> alpha, rtn_market -> beta
results[0] 

0,1,2,3
Dep. Variable:,rtn_portfolio,R-squared:,0.564
Model:,OLS,Adj. R-squared:,0.561
Method:,Least Squares,F-statistic:,214.4
Date:,"Fri, 05 Jan 2024",Prob (F-statistic):,1.06e-31
Time:,17:36:43,Log-Likelihood:,-272.08
No. Observations:,168,AIC:,548.2
Df Residuals:,166,BIC:,554.4
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-0.0332,0.096,-0.347,0.729,-0.222,0.156
rtn_market,1.1536,0.079,14.642,0.000,0.998,1.309

0,1,2,3
Omnibus:,27.922,Durbin-Watson:,1.825
Prob(Omnibus):,0.0,Jarque-Bera (JB):,69.834
Skew:,0.678,Prob(JB):,6.85e-16
Kurtosis:,5.853,Cond. No.,1.26


In [80]:
# 초과수익률 유의성 검정 T-Test
results[1]

TtestResult(statistic=2.462035775633172, pvalue=0.007415410426320338, df=167)