# 1. Load data

In [22]:
import pandas as pd
import numpy as np
import warnings
from utils import XD
from scipy.stats import wilcoxon
import pymannkendall as mk
from tqdm import tqdm
import itertools
warnings.filterwarnings("ignore")

In [23]:
# read data
cn = pd.read_csv("../1223/data/concentration/concentration/2020_20230814.csv").dropna()
q = pd.read_csv("../1223/data/concentration/quote/2020_20230814.csv").dropna()

# Process

In [24]:
## config

# window size
N_P = 10 

# return volume
VOL_TRES = 1000

# Critical
CRITICAL = 0.05

# Volume
VOLUME_MULTIPLIER = 1.3

In [25]:
# merge data
df = pd.merge(q, cn, on=['日期', '股號'], how='left')
df = df.groupby('股號').apply(lambda x: x.sort_values('日期')).reset_index(drop=True)


# 量起
df['成交量_1'] = df.groupby('股號')['成交量'].shift(1).dropna()

def divide_two_cols(df_sub):
    df_sub['volume_delta_1'] = df_sub['成交量_1'] / df_sub['成交量']
    return df_sub

df = df.groupby('股號', as_index=False).apply(divide_two_cols)


# 價揚
df['ret'] = df.groupby('股號', as_index=False)['收盤價'].pct_change()
df['ret_1'] = df.groupby('股號', as_index=False)['ret'].shift(-1)
df = df.dropna()

In [26]:
g_df = df.groupby('股號')

stock_uq_id = df['股號'].unique()

res_d = {}
bug_li = []

for id in tqdm(stock_uq_id[:]):
    # print(id)

    # group by
    tmp = g_df.get_group(id)
    
    # process 
    vol_increase_id = tmp[(tmp['成交量']>VOL_TRES)&(tmp['volume_delta_1']>VOLUME_MULTIPLIER)].index
    tmp['indicator'] = tmp.index.isin(vol_increase_id).astype(int)
    tmp['abnormal'] = (tmp['indicator'].shift(1) == 0) & (tmp['indicator'] == 1)
    tmp = tmp.dropna()

    try:
        # calculate cumulative event related return 
        c, cp = XD.get_indcum(col_ret = tmp['ret_1'], col_abnormal=tmp['abnormal'], num_period=N_P)
        tmp[f'cumret_{N_P}'] = c

        # test
        cp = list(k for k,_ in itertools.groupby(cp))

        result_wilcoxon = []
        result_trend = []

        for i, l in enumerate(cp):

            if len(l)>1:
                if sum(l) == 0:
                    continue
                else:

                    _, p_value_wilcoxon = wilcoxon(l)
                    trend, h, p, z, Tau, s, var_s, slope, intercept =  mk.original_test(l)
                    result_wilcoxon.append(p_value_wilcoxon)
                    result_trend.append(trend)

        res_d[id] = [np.array(result_wilcoxon), np.array(result_trend), len(result_wilcoxon)]
        res_d[id].insert(4,tmp['日期'][vol_increase_id])

    except UnboundLocalError:
        bug_li.append(id)
        pass
   

100%|██████████| 1350/1350 [02:05<00:00, 10.74it/s]


# 3. result Analysis

In [33]:
for key, value in tqdm(res_d.items()):

    # significant signed test
    wk_significant_count = len(np.where(res_d[key][0] < CRITICAL)[0].tolist())  
    trend_significant_count = len(np.where(res_d[key][1] != 'no trend')[0].tolist())  

    # significant percentile
    if res_d[key][2]!=0:
        wk_significant_perc = wk_significant_count/res_d[key][2]
        trend_significant_perc = trend_significant_count/res_d[key][2]

        tmp = [wk_significant_count, wk_significant_perc, trend_significant_count, trend_significant_perc]
        res_d[key].append(tmp)

100%|██████████| 1349/1349 [00:00<00:00, 150409.81it/s]


In [34]:
# filter condition

## 1. There are more then N volumne increase singal 
N_SIGNAL = 10

## 2. MK test significant percentage
MK_TEST_SIGNIFICANT_PERCENT = 0.7

## 3. trend test significant percentage
TREND_TEST_SIGNIFICANT_PERCENT = 0.7

In [35]:
final_res = {}
for key, value in tqdm(res_d.items()):

    try:
    
        if res_d[key][2]!=0:
            condi1 = res_d[key][2] > N_SIGNAL
            condi2 = res_d[key][4][1] > MK_TEST_SIGNIFICANT_PERCENT
            condi3 =  res_d[key][4][3] > TREND_TEST_SIGNIFICANT_PERCENT

            if condi1 and condi2 and condi3:
                final_res[key] = {'num_sig':res_d[key][2], 'mk_sig_perc':res_d[key][4][1], 'trend_sig_perc':res_d[key][4][3],
                                  'Date':res_d[key][3]}

    except IndexError:
       pass

100%|██████████| 1349/1349 [00:00<00:00, 896832.48it/s]


In [36]:
ticker = []
date = []
for i in final_res.keys():
    date.extend(final_res[i]['Date'].values.tolist())
    ticker.extend([i for j in range(len(final_res[i]['Date'].values.tolist()))])


In [37]:
res_df = pd.DataFrame(list(zip(ticker, date)), columns =['ticker', 'date']) 

In [38]:
res_df.to_csv(f"./res/wd{N_P}_vol{VOL_TRES}.csv")