In [2]:
import pandas as pd
import numpy as np
import warnings
import eventstudy as es
from tqdm import tqdm
import itertools
import pymannkendall as mk
import matplotlib.pyplot as plt 
import yfinance as yf
from scipy.stats import wilcoxon
from multiprocessing import cpu_count
from concurrent.futures import ThreadPoolExecutor
import os, sys
from pathlib import Path
import pickle
sys.path.append(str(Path(os.getcwd()).parent / '1226' / 'utils'))
import XD
from utils import process

warnings.filterwarnings("ignore")

In [3]:
q = pd.read_csv("../1223/data/concentration/quote/2020_20230814.csv", index_col=False).dropna()
brk = pd.read_csv("../1223/data/concentration/brk/2022_BrkNetAmt.csv", index_col=False).dropna()

In [4]:
df = pd.merge(q, brk, on=['日期', '股號'], how='left')
df = df.groupby('股號').apply(lambda x: x.sort_values('日期')).reset_index(drop=True)
df = df.rename(columns={'漲跌幅(%)':'ret'})

In [5]:
bk_uq_id = df['分點'].unique()

# Number of processes to use (adjust as needed)
num_processes = min(cpu_count(), len(bk_uq_id))


with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
    list(tqdm(executor.map(process.process_bk, [bk for bk in bk_uq_id], [df for _ in range(len(bk_uq_id))]), total=len(bk_uq_id)))
    
df = df.drop(columns=['indicator_nan'])

100%|██████████| 548/548 [00:16<00:00, 33.24it/s]


In [6]:
# cols_index
query_cols=['ret','日期', '股號', 'indicator_922H']
cols_index = df.columns.get_indexer(query_cols)
print(cols_index)

[ 3  0  1 13]


In [7]:
np_df = df.to_numpy()
np_df_tick = np.array(np.split(np_df, np.unique(np_df[:, 1], return_index=True)[1][1:]), dtype=object)

In [8]:
vec_t = np.vectorize(process.transpose_element)
np_df_tick = vec_t(np_df_tick)
# broad cast "np_df_tick"
np_df_tick[0]
# broad cast "np_df_tick[0]"

array([['2020-01-02', '2020-01-03', '2020-01-06', ..., '2023-08-10',
        '2023-08-11', '2023-08-14'],
       [1101, 1101, 1101, ..., 1101, 1101, 1101],
       [44.1, 43.95, 43.45, ..., 36.85, 37.4, 36.85],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=object)

In [9]:
ret_2d = np.tile(np_df_tick[0][3], np_df_tick[0][8:].shape)
signal_2d = np_df_tick[0][8:]
combined_array = np.array([[a, b] for a, b in zip(signal_2d, ret_2d)], dtype=object)

In [21]:
def test(ret1darr,ind1darr, num_period=10):
    res = np.array([])
    res_test = np.array([], dtype=object)
    i = 0
    while i <= len(ind1darr)-num_period+1:
        tmp_sig = ind1darr[i:i+num_period]
        tmp_ret = ret1darr[i:i+num_period]
        
        if tmp_sig[0]==True:
            res = np.append(res, np.cumprod(1 + tmp_ret) - 1)
            trend, h, p, z, Tau, s, var_s, slope, intercept =  mk.original_test(np.cumprod(1 + tmp_ret) - 1)
            res_test = np.append(res_test, trend)
            i += num_period
            next

        else: 
            res = np.append(res, tmp_ret[0])
            i += 1
            
    if i > len(ind1darr)-num_period+1:
        if tmp_sig[0]==True:
            res = np.append(res, np.cumprod(1 + ind1darr[i:]) - 1)
            trend, h, p, z, Tau, s, var_s, slope, intercept =  mk.original_test(np.cumprod(1 + ind1darr[i:]) - 1)
            res_test = np.append(res_test, trend)
        else:
            res = np.append(res, ret1darr[i:])
            
    return res, res_test
    # return res.tolist()
    # return res.tolist()

In [22]:
get_indcum_vec = np.vectorize(test)
res, res_test = get_indcum_vec(combined_array[:,[1]], combined_array[:,[0]])
# res = np.array(list(map(process._2list,res.tolist())))

In [24]:
res_test

array([[array(['no trend'], dtype=object)],
       [array(['decreasing'], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array(['no trend'], dtype=object)],
       [array(['no trend'], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array(['no trend', 'no trend', 'no trend', 'decreasing', 'decreasing',
               'no trend', 'no trend', 'no trend', 'no trend'], dtype=object) ],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([], dtype=object)],
       [array([]

In [None]:
stock_uq_id = df['股號'].unique()
g_df = df.groupby('股號')
res_d = {}
bug_li = []

for bk in tqdm(bk_uq_id[:]):
    
    res_tick = {}
    for tick in tqdm(stock_uq_id[:]):
        tmp = g_df.get_group(tick)
        try:
            # calculate cumulative event related return 
            c, cp = XD.get_indcum(ret1darr = tmp['ret'], ind1darr=tmp[f'indicator_{bk}'], num_period=N_P)
            tmp[f'cumret_{bk}_{N_P}'] = c
            
            # perform test
            cp = list(k for k,_ in itertools.groupby(cp))
            result_trend = {}
            
            # date list
            date_list = []
            
            for i, l in enumerate(cp):
                if len(l)>1:
                    if sum(l) == 0:
                        continue
                    else:
                        trend, h, p, z, Tau, s, var_s, slope, intercept =  mk.original_test(l)
                        result_trend[f'{i}_trend'] = trend
                        
                        # date index
                        date_index = tmp[tmp[f'indicator_{bk}']!=0].index[i]
                        date_list.append(tmp['日期'][date_index])
            

            if len(result_trend)!=0:
                res_tick[f'{tick}'] = [result_trend, date_list] 
            
        except KeyError:
            break
        res_d[bk] = res_tick