In [24]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import Markdown as md
from ipynb.fs.full import functions
import pandas as pd

InteractiveShell.ast_node_interactivity = "all"

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [25]:
import numpy as np
from scipy.stats import ks_1samp, norm

In [26]:
formation_filename = 'output_formation.xlsx'
formation_excel_book = pd.read_excel(io=formation_filename, header=[0,1], index_col=0, sheet_name=None)

In [27]:
def symbolize(p_value: float) -> str:
  if p_value <= .001:
    return '***'
  elif p_value <= .01:
    return '**'
  elif p_value <= .05:
    return '*'
  else:
    return ''

In [28]:
input_t_test_dict = {
    'PRO': {
        'winner': [],
        'loser': [],
    },
    'CON': {
        'winner': [],
        'loser': [],
    },
}

winner_loser_list = []

for case in formation_excel_book.keys():
    pro_or_con = case.split(sep='-')[0]

    formation_df = formation_excel_book[case]

    factor = int(len(formation_df.columns.levels[0]) / 5)
    ticker_list = formation_df.columns.unique(level=0).to_list()
    winner_ticker_list = ticker_list[:factor]
    loser_ticker_list = ticker_list[-factor:]

    winner_loser_list.append({
        'case': case,
        'winner': ', '.join(winner_ticker_list),
        'loser': ', '.join(loser_ticker_list),
    })

    winner_car = formation_df.copy()[[(i, 'param3_cu') for i in winner_ticker_list]].mean(axis=1).iloc[-1]
    loser_car = formation_df.copy()[[(i, 'param3_cu') for i in loser_ticker_list]].mean(axis=1).iloc[-1]

    input_t_test_dict[pro_or_con]['winner'].append(winner_car)
    input_t_test_dict[pro_or_con]['loser'].append(loser_car)
    
formation_t_test_list = []
for pro_or_con in input_t_test_dict.keys():
    for winner_or_loser in input_t_test_dict[pro_or_con].keys():
        car_list = input_t_test_dict[pro_or_con][winner_or_loser]
        acar = np.mean(car_list)
        t_stat, p_value = ks_1samp(x=car_list, cdf=norm.cdf)
        symbol = symbolize(p_value=p_value)
        formation_t_test_list.append({
            'pro_or_con': pro_or_con,
            'winner_or_loser': winner_or_loser,
            'acar': acar,
            't_stat': t_stat,
            'p_value': p_value,
            'symbol': symbol,
        })

winner_loser_df = pd.DataFrame(winner_loser_list)
formation_t_test_df = pd.DataFrame(formation_t_test_list)

md('**Winner-Loser List**')
winner_loser_df

md('**Formation T-Test Result**')
formation_t_test_df

**Winner-Loser List**

Unnamed: 0,case,winner,loser
0,PRO-1,"DWGL, ALKA, ANTM, TPIA, ARTO, KBLM, TGKA, JMAS...","BBKP, BSSR, BRAM, KRAS, SOSS, WIIM, INKP, META..."
1,PRO-2,"YPAS, ARTO, DWGL, JRPT, TBIG, FIRE, BRPT, PCAR...","MYOH, MIDI, INAI, MEDC, RODA, BMTR, MTWI, PDES..."
2,PRO-3,"DAYA, INTD, SQMI, BPII, ALKA, JIHD, ESSA, DNAR...","HRTA, CASA, ISAT, MOLI, SSMS, BDMN, HOKI, WOOD..."
3,PRO-4,"ERTX, WAPO, BYAN, INCF, SAME, ITMA, PANR, SDPC...","LEAD, GOOD, PNBS, MSKY, LMPI, AUTO, SSIA, TNCA..."
4,CON-1,"PGLI, BOGA, BRAM, NICK, MFMI, BRPT, ALKA, MPOW...","WAPO, MAIN, PTRO, GPRA, DNAR, HMSP, APII, SMRA..."
5,CON-2,"CITY, KOIN, KIOS, TPMA, AGII, FILM, BBHI, KRAS...","HMSP, TUGU, BTPS, PZZA, GOOD, FAST, IPCC, BBMD..."
6,CON-3,"TRIS, YPAS, BSIM, BKSW, SKRN, BYAN, MARI, WAPO...","LPPS, PNBN, INKP, SMCB, ESTI, INRU, PNLF, MERK..."
7,CON-4,"FMII, MTSM, MFIN, MOLI, MITI, PYFA, CFIN, PORT...","KOIN, KBLM, LTLS, CITA, SIDO, BSSR, ABBA, KOBX..."


**Formation T-Test Result**

Unnamed: 0,pro_or_con,winner_or_loser,acar,t_stat,p_value,symbol
0,PRO,winner,0.086265,0.527674,0.145931,
1,PRO,loser,-0.100991,0.536952,0.133485,
2,CON,winner,0.149699,0.54445,0.123961,
3,CON,loser,-0.121813,0.528292,0.145079,


In [29]:
with pd.ExcelWriter('output_formation_winner-loser_ticker_list.xlsx') as excel_writer:
    winner_loser_df.to_excel(excel_writer=excel_writer, index=None)

with pd.ExcelWriter('output_formation_t-test_result.xlsx') as excel_writer:
    formation_t_test_df.to_excel(excel_writer=excel_writer, index=None)