# Import data

In [1]:
import pandas as pd
import numpy as np
import warnings
import eventstudy as es
from tqdm import tqdm
import itertools
import pymannkendall as mk
import matplotlib.pyplot as plt 
import yfinance as yf
from scipy.stats import wilcoxon
from multiprocessing import cpu_count
from concurrent.futures import ThreadPoolExecutor
import os, sys
from pathlib import Path
import pickle
sys.path.append(str(Path(os.getcwd()).parent.parent / '1226' / 'utils'))
import XD

warnings.filterwarnings("ignore")

In [2]:
q = pd.read_csv("../../1223/data/concentration/quote/2020_20230814.csv", index_col=False).dropna()
brk = pd.read_csv("../../1223/data/concentration/brk/2022_BrkNetAmt.csv", index_col=False).dropna()

In [3]:
df = pd.merge(q, brk, on=['日期', '股號'], how='left')
df = df.groupby('股號').apply(lambda x: x.sort_values('日期')).reset_index(drop=True)
df = df.rename(columns={'漲跌幅(%)':'ret'})

# Event by breaker

In [4]:
def process_bk(bk, df):
    if bk:
        signal_id = df[((df['買賣超金額'].notnull()) | df['買賣超金額'].notna()) & (df['分點'] == f'{bk}')].index
        df[f'indicator_{bk}'] = df.index.isin(signal_id).astype(int)

    return df
        
bk_uq_id = df['分點'].unique()

# Number of processes to use (adjust as needed)
num_processes = min(cpu_count(), len(bk_uq_id))

with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
    list(tqdm(executor.map(process_bk, [bk for bk in bk_uq_id], [df for _ in range(len(bk_uq_id))]), total=len(bk_uq_id)))

df = df.drop(columns=['indicator_nan'])

100%|██████████| 548/548 [00:16<00:00, 33.13it/s]


## Iterate by broker

In [5]:
N_P = 10

In [6]:
stock_uq_id = df['股號'].unique()
g_df = df.groupby('股號')
res_d = {}
bug_li = []

for bk in tqdm(bk_uq_id[:]):
    
    res_tick = {}
    for tick in tqdm(stock_uq_id[:]):
        tmp = g_df.get_group(tick)
        try:
            # calculate cumulative event related return 
            c, cp = XD.get_indcum(col_ret = tmp['ret'], col_abnormal=tmp[f'indicator_{bk}'], num_period=N_P)
            tmp[f'cumret_{bk}_{N_P}'] = c
            
            # perform test
            cp = list(k for k,_ in itertools.groupby(cp))
            result_trend = {}
            
            # date list
            date_list = []
            
            for i, l in enumerate(cp):
                if len(l)>1:
                    if sum(l) == 0:
                        continue
                    else:
                        trend, h, p, z, Tau, s, var_s, slope, intercept =  mk.original_test(l)
                        result_trend[f'{i}_trend'] = trend
                        
                        # date index
                        date_index = tmp[tmp[f'indicator_{bk}']!=0].index[i]
                        date_list.append(tmp['日期'][date_index])
            

            if len(result_trend)!=0:
                res_tick[f'{tick}'] = [result_trend, date_list] 
            
        except KeyError:
            break
        res_d[bk] = res_tick

  0%|          | 0/1804 [00:00<?, ?it/s]
100%|██████████| 1804/1804 [01:46<00:00, 16.91it/s]
100%|██████████| 1804/1804 [01:30<00:00, 19.85it/s]
100%|██████████| 1804/1804 [01:29<00:00, 20.15it/s]
100%|██████████| 1804/1804 [01:29<00:00, 20.18it/s]
100%|██████████| 1804/1804 [01:29<00:00, 20.22it/s]
100%|██████████| 1804/1804 [01:31<00:00, 19.74it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.48it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.46it/s]
100%|██████████| 1804/1804 [01:29<00:00, 20.22it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.41it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.39it/s]
100%|██████████| 1804/1804 [01:29<00:00, 20.20it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.47it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.41it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.38it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.34it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.47it/s]
100%|██████████| 1804/1804 [01:28<00:00, 20.47it/s]
100%|██████████| 1804/1

In [7]:
file_path = 'brk_tick_res.pkl'
with open(file_path, 'wb') as file:
    pickle.dump(res_d, file)

In [None]:
res_d

In [None]:
# def get_res_by_tick(stock_uq_id, bk, g_df):
#     res_tick = {}
#     for tick in tqdm(stock_uq_id[:]):
#         tmp = g_df.get_group(tick)
#         try:
#             # calculate cumulative event related return 
#             c, cp = XD.get_indcum(col_ret = tmp['ret'], col_abnormal=tmp[f'indicator_{bk}'], num_period=N_P)
#             tmp[f'cumret_{bk}_{N_P}'] = c
            
#             # perform test
#             cp = list(k for k,_ in itertools.groupby(cp))
#             result_trend = {}
            
#             for i, l in enumerate(cp):
#                 if len(l)>1:
#                     if sum(l) == 0:
#                         continue
#                     else:
#                         trend, h, p, z, Tau, s, var_s, slope, intercept =  mk.original_test(l)
#                         result_trend[f'{i}_trend'] = trend

#             if len(result_trend)!=0:

#                 res_tick[f'{tick}'] = result_trend 
            
#         except KeyError:
#             break
        
#     return res_tick

In [None]:
# stock_uq_id = df['股號'].unique()
# g_df = df.groupby('股號')
# res_d = {}
# bug_li = []

# for bk in tqdm(bk_uq_id[:3]):
#     res_d[bk] = get_res_by_tick(stock_uq_id=stock_uq_id, g_df=g_df, bk=bk)

In [None]:
file_path = 'brk_tick_res.pkl'
with open(file_path, 'wb') as file:
    pickle.dump(res_d, file)

In [9]:
res_d['922H']

{'1101': [{'0_trend': 'no trend'}, ['2022-03-15']],
 '1102': [{'0_trend': 'decreasing'}, ['2022-07-22']],
 '1304': [{'0_trend': 'no trend'}, ['2022-07-22']],
 '1402': [{'0_trend': 'no trend', '1_trend': 'increasing'},
  ['2022-07-22', '2022-10-12']],
 '1477': [{'0_trend': 'no trend', '1_trend': 'no trend'},
  ['2022-08-05', '2022-10-07']],
 '1513': [{'0_trend': 'decreasing'}, ['2022-11-23']],
 '1536': [{'0_trend': 'no trend'}, ['2022-09-26']],
 '1590': [{'0_trend': 'no trend'}, ['2022-01-19']],
 '1605': [{'0_trend': 'no trend'}, ['2022-08-16']],
 '1760': [{'0_trend': 'increasing'}, ['2022-12-26']],
 '1795': [{'0_trend': 'no trend'}, ['2022-09-22']],
 '2002': [{'0_trend': 'increasing', '1_trend': 'increasing'},
  ['2022-03-01', '2022-08-02']],
 '2301': [{'0_trend': 'no trend'}, ['2022-10-12']],
 '2303': [{'0_trend': 'no trend', '1_trend': 'increasing'},
  ['2022-03-25', '2022-11-07']],
 '2308': [{'0_trend': 'no trend'}, ['2022-09-06']],
 '2317': [{'0_trend': 'decreasing'}, ['2022-11-01'