# Import data

In [1]:
import pandas as pd
import numpy as np
import warnings
import eventstudy as es
from tqdm import tqdm
import itertools
import eventstudy
import matplotlib.pyplot as plt
import yfinance as yf
warnings.filterwarnings("ignore")

In [2]:
# read data
cn = pd.read_csv("../../1223/data/concentration/2020_20230814.csv").dropna()
q = pd.read_csv("../../1223/data/quote/2020_20230814.csv").dropna()
rf = pd.read_csv("../../1223/data/rf/rf.csv").dropna().rename(columns={'日期':'date'})
df = pd.merge(q, cn, on=['日期', '股號'], how='left')
df = df.groupby('股號').apply(lambda x: x.sort_values('日期')).reset_index(drop=True)

In [3]:
# get marketret data
stock_code = "^TWII"

start_date = "2020-01-01"
end_date = "2023-12-31" 

twii_data = yf.download(stock_code, start=start_date, end=end_date)
twii_data['date'] =  pd.to_datetime(twii_data.index)
twii_data['ret'] = twii_data['Adj Close'].pct_change()

[*********************100%***********************]  1 of 1 completed


# Mutate

In [4]:
# mutate col
df = pd.merge(q, cn, on=['日期', '股號'], how='left')
df = df.groupby('股號').apply(lambda x: x.sort_values('日期')).reset_index(drop=True)


# 量起
df['成交量_1'] = df.groupby('股號')['成交量'].shift(-1)

def divide_two_cols(df_sub):
    df_sub['volume_delta_1'] = df_sub['成交量_1'] / df_sub['成交量']
    return df_sub

df = df.groupby('股號').apply(divide_two_cols)


# 價揚
# shift 1 假設是成交量出現異常後的下一根進場
df['ret'] = df.groupby('股號')['收盤價'].pct_change()
df['ret_2'] = df.groupby('股號')['ret'].shift(-1)
df = df.dropna()

# Prepare Input

In [5]:
def create_ret_df(df):

    uq_id = df['股號'].unique()
    res_by_ticker = []
    res = None

    for ticker_id in tqdm(uq_id[:len(uq_id)]):
        ticker_df = df[df['股號']==ticker_id].reset_index(drop=True)

        date = ticker_df['日期'].tolist()
        ret = ticker_df['ret'].tolist()

        ticker_ret_df = pd.DataFrame(list(zip(date, ret)),
               columns =['date', f'{ticker_id}'])
        
        res_by_ticker.append(ticker_ret_df)
    for i in tqdm(range(len(res_by_ticker))):
        if i == 0:
            res = res_by_ticker[i]

        else:
            res = pd.merge(res, res_by_ticker[i], how='left', on='date')

    return res


In [6]:
return_df = create_ret_df(df)

100%|██████████| 1350/1350 [03:50<00:00,  5.85it/s]
100%|██████████| 1350/1350 [03:56<00:00,  5.70it/s]


In [7]:
rf["date"] = pd.to_datetime(rf['date'])
return_df["date"] =  pd.to_datetime(return_df["date"])

main_df_4analysis = pd.merge(return_df, rf, how='left', on='date')
main_df_4analysis = pd.merge(main_df_4analysis, twii_data, how='left', on='date')


In [15]:
# 這行記得
uq_id = df['股號'].unique()

for tick in tqdm(uq_id[:]):
    main_df_4analysis = main_df_4analysis.dropna(subset=[str(tick), 'ret'])
    returns_df = main_df_4analysis.iloc[:,[main_df_4analysis.columns.get_loc(c) for c in ['date', f'{tick}']]] 
    returns_df.to_csv(f'./df/returns_{tick}.csv')

100%|██████████| 1350/1350 [00:21<00:00, 62.49it/s] 


In [9]:
# fama df
famadf = main_df_4analysis.iloc[:,[main_df_4analysis.columns.get_loc(c) for c in ['date', '收市', 'ret']]].rename(columns={'收市':'RF', 'ret':'MktRt'})
famadf['Mkt-RF'] = famadf['MktRt'] - famadf['RF']
famadf['SMB'] = 0
famadf['HML'] = 0
famadf.to_csv('./df/fama.csv')

# Event Study

In [16]:
uq_id = df['股號'].unique()
final_res = pd.DataFrame(columns=['AR', 'Std. E. AR', 'CAR', 'Std. E. CAR', 'T-stat', 'P-value',
       'evnet_window', 'EventDate', 'symbol'])

for tick in tqdm(uq_id[:]):
    

    es.Single.import_returns(path=f'./df/returns_{tick}.csv')
    es.Single.import_FamaFrench(path='./df/fama.csv', date_format = '%Y-%m-%d %H:%M:%S.%f')

    # df tick
    df_tick = df[df['股號']==tick].reset_index(drop=True)

    # get event date
    vol_increase_id = df_tick[(df_tick['成交量']>500)&(df_tick['volume_delta_1']>2)].index
    event_date = df_tick['日期'][vol_increase_id].to_list()
    # print(tick, event_date)

    for ed in event_date:
        try:
            # print(tick, ed)
            event = es.Single.FamaFrench_3factor(
                security_ticker = str(tick),
                event_date = np.datetime64(str(ed)),
                event_window = (-2,+10), 
                estimation_size = 10, # 注意這個
                buffer_size = 30,
                keep_model=True
            )
            tick_res = event.results(decimals=[3,5,3,5,2,2])
            tick_res['evnet_window'] = tick_res.index
            tick_res['EventDate'] =  np.datetime64(ed)
            tick_res['symbol'] = tick
            tick_res = tick_res.reset_index(drop=True)
            final_res = final_res.append(tick_res, ignore_index=True)
        except:
            continue

    # print(final_res)
    

100%|██████████| 1350/1350 [06:35<00:00,  3.41it/s]


In [17]:
final_res

Unnamed: 0,AR,Std. E. AR,CAR,Std. E. CAR,T-stat,P-value,evnet_window,EventDate,symbol
0,-0.045,0.00674,-0.045 ***,0.00674,-6.66,0.0,-2,2020-03-16,1101
1,-0.027,0.00674,-0.072 ***,0.00953,-7.60,0.0,-1,2020-03-16,1101
2,-0.052,0.00674,-0.124 ***,0.01167,-10.62,0.0,0,2020-03-16,1101
3,-0.046,0.00674,-0.17 ***,0.01347,-12.61,0.0,1,2020-03-16,1101
4,-0.048,0.00674,-0.218 ***,0.01506,-14.49,0.0,2,2020-03-16,1101
...,...,...,...,...,...,...,...,...,...
141435,-0.051,0.01583,-0.461 ***,0.04748,-9.70,0.0,6,2023-06-06,2431
141436,-0.043,0.01583,-0.504 ***,0.05005,-10.07,0.0,7,2023-06-06,2431
141437,-0.025,0.01583,-0.529 ***,0.05249,-10.08,0.0,8,2023-06-06,2431
141438,-0.034,0.01583,-0.563 ***,0.05483,-10.27,0.0,9,2023-06-06,2431
