In [1]:
import numpy as np
import pandas as pd
from itertools import combinations

from processor.dart import DART_FUNDAMENTAL_PROCESSOR
from processor.fdr import FDR_INFO_PROCESSOR, FDR_OHLCV_PREPROCESSOR

from processor.factor.fundamental import FUNDAMENTAL_FACTOR_PROCESSOR
from processor.factor.parameter import FACTOR_PARAMETER_PROCESSOR

from analyser.factor import FACTOR_ANALYSER

In [2]:
CFG = {
    "analysis_date": "2023-11-01",
    "FUNDAMENTAL_FACTOR_CFG": {
        "ohlcv_recent_n": 10,
    },
    "FACTOR_ANALYSIS_CFG": {
        "profit_recent_n": 20,
    },
}

In [3]:
# dart_fundamental
dart_fundamental_df = pd.read_csv("./data/dart_fundamental_df.csv", index_col=0)
dart_fundamental_processor = DART_FUNDAMENTAL_PROCESSOR(dart_fundamental_df)

In [4]:
# fdr_info
fdr_info_df = pd.read_csv("./data/fdr_info_df.csv", index_col=0)
fdr_info_processor = FDR_INFO_PROCESSOR(fdr_info_df)

In [5]:
# fdr_ohlcv
fdr_ohlcv_df = pd.read_csv("./data/fdr_ohlcv_df.csv", index_col=0)
fdr_ohlcv_preprocessor = FDR_OHLCV_PREPROCESSOR(fdr_ohlcv_df)
pps_fdr_ohlcv_df = fdr_ohlcv_preprocessor.get_pps_fdr_ohlcv_df()

In [6]:
# split ohlcv
future_ohlcv_df = pps_fdr_ohlcv_df[CFG["analysis_date"] < pps_fdr_ohlcv_df["Date"]].copy()

ohlcv_df = pps_fdr_ohlcv_df[pps_fdr_ohlcv_df["Date"] < CFG["analysis_date"]].copy()

In [8]:
# get fundamental_factors_df
fundamental_factor_processor = FUNDAMENTAL_FACTOR_PROCESSOR(ohlcv_df, dart_fundamental_processor, fdr_info_processor)
fundamental_factors_df = fundamental_factor_processor.get_fundamental_factor_df(CFG["FUNDAMENTAL_FACTOR_CFG"])

In [9]:
# Analysis
factor_analyser = FACTOR_ANALYSER(fundamental_factors_df)

In [10]:
factors = [col for col in fundamental_factors_df.columns if col != "StockCode"]
factor_combs = list(combinations(factors, 2))

In [11]:
params_dict = dict()

for factor_comb in factor_combs:
    profit_analysis_2d_df = factor_analyser.get_profit_analysis_2d_df(
        future_ohlcv_df, CFG["FACTOR_ANALYSIS_CFG"], factor_comb
    )
    profit_analysis_2d_df.fillna(profit_analysis_2d_df.mean().mean(), inplace=True)

    factor_parameter_processor = FACTOR_PARAMETER_PROCESSOR(profit_analysis_2d_df, 3, 3)
    best_args = factor_parameter_processor.get_best_args()
    best_value = factor_parameter_processor.get_best_value()
    params_dict[best_args] = best_value

In [12]:
params_df = pd.DataFrame().from_dict(params_dict, orient="index",columns=['Value'])

In [13]:
params_df.nlargest(5, "Value")

Unnamed: 0,Value
"((TEPP, 0.0, 0.3), (CAPP, 0.7, 1.0))",0.128517
"((TAPP, 0.7, 1.0), (TEPP, 0.2, 0.5))",0.107244
"((TAPP, 0.6, 0.9), (CAPP, 0.0, 0.3))",0.092093
"((CLR, 0.2, 0.5), (TLR, 0.6, 0.9))",0.089866
"((TLR, 0.0, 0.3), (CEPP, 0.3, 0.6))",0.088302


In [14]:
list(params_df.nlargest(5, "Value").index)

[(('TEPP', 0.0, 0.3), ('CAPP', 0.7, 1.0)),
 (('TAPP', 0.7, 1.0), ('TEPP', 0.2, 0.5)),
 (('TAPP', 0.6, 0.9), ('CAPP', 0.0, 0.3)),
 (('CLR', 0.2, 0.5), ('TLR', 0.6, 0.9)),
 (('TLR', 0.0, 0.3), ('CEPP', 0.3, 0.6))]