In [None]:
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

import kquant as kq
from tqdm import tqdm

In [2]:
# set API account
def set_api_account():
    kq.set_api("KRX2308020", "EQDkUcyI3dK6oIAXqAR8BXOK4bKxHHmH")
    return None

In [3]:
set_api_account()

In [4]:
class DATA_LOADER:
    class STOCK_TODAY_LOADER:
        @staticmethod
        def load_stock_today_df():
            stock_today_df = kq.rank_stocks()
            return stock_today_df

        @staticmethod
        def get_symbol_price_dict(stock_today_df):
            symbol_price_dict = stock_today_df.set_index("SYMBOL")["CLOSE"].to_dict()
            return symbol_price_dict

        def __call__(self):
            stock_today_df = self.load_stock_today_df()
            symbol_price_dict = self.get_symbol_price_dict(stock_today_df)
            return symbol_price_dict

    class SYMBOL_LOADER:
        @staticmethod
        def load_symbols_df():
            symbols_df = kq.symbol_stock()
            return symbols_df

        class SYMBOL_FILTER:
            @staticmethod
            def filter__market(symbols_df):
                filtered_symbols_df = symbols_df[
                    (symbols_df["MARKET"].isin(["코스닥", "유가증권"]))
                ].copy()
                return filtered_symbols_df

            @staticmethod
            def filter__admin_issue(symbols_df):
                filtered_symbols_df = symbols_df[
                    (symbols_df["ADMIN_ISSUE"] == 0)
                ].copy()
                return filtered_symbols_df

            @staticmethod
            def filter_sec_type(symbols_df):
                filtered_symbols_df = symbols_df[
                    symbols_df["SEC_TYPE"].isin(["ST", "EF", "EN"])
                ].copy()
                return filtered_symbols_df

        def filter_symbols_df(self, symbols_df):
            symbol_filter = self.SYMBOL_FILTER()
            filtered_symbols_df = symbol_filter.filter__market(symbols_df)
            filtered_symbols_df = symbol_filter.filter__admin_issue(filtered_symbols_df)
            filtered_symbols_df = symbol_filter.filter_sec_type(filtered_symbols_df)
            return filtered_symbols_df

        @staticmethod
        def get_symbols(symbols_df):
            symbols = sorted(set(symbols_df["SYMBOL"]))
            return symbols

        # SYMBOL_LOADER PIPELINE
        def __call__(self):
            symbols_df = self.load_symbols_df()
            filtered_symbols_df = self.filter_symbols_df(symbols_df)
            symbols = self.get_symbols(filtered_symbols_df)
            return symbols

    class STOCK_TECHNICAL_LOADER:
        def __init__(self, symbols) -> None:
            self.symbols = symbols

        @staticmethod
        def load_stock_data_df(symbols):
            stock_data_df_list = list()
            for symbol in tqdm(symbols):
                _stock_data_df = kq.daily_stock(symbol)
                stock_data_df_list.append(_stock_data_df)
            stock_data_df = pd.concat(stock_data_df_list, axis=0)
            return stock_data_df

        class STOCK_FILTER:
            @staticmethod
            def filter__zero_row(stock_data_df):
                columns = ["VOLUME", "OPEN", "HIGH", "LOW", "CLOSE"]
                filtered_stock_data_df = stock_data_df[
                    (stock_data_df.loc[:, columns].all(axis=1))
                ].copy()
                return filtered_stock_data_df

            @staticmethod
            def filter__na_row(stock_data_df):
                columns = ["VOLUME", "OPEN", "HIGH", "LOW", "CLOSE"]
                filtered_stock_data_df = stock_data_df[
                    ~(stock_data_df[columns].isna().any(axis=1))
                ].copy()
                return filtered_stock_data_df

            @staticmethod
            def filter__cnt(stock_data_df):
                stock_cnt_series = stock_data_df.groupby("SYMBOL").count()["CLOSE"]
                stock_mode_cnt = stock_cnt_series.mode()[0]

                cnt_filtered_series = stock_cnt_series[
                    stock_cnt_series == stock_mode_cnt
                ]
                cnt_filtered_stocks = cnt_filtered_series.index

                filtered_stock_data_df = stock_data_df[
                    stock_data_df["SYMBOL"].isin(cnt_filtered_stocks)
                ].copy()
                return filtered_stock_data_df

        def filter_stock_data_df(self, stock_data_df):
            stock_filter = self.STOCK_FILTER()
            filtered_stock_data_df = stock_filter.filter__zero_row(stock_data_df)
            filtered_stock_data_df = stock_filter.filter__na_row(filtered_stock_data_df)
            filtered_stock_data_df = stock_filter.filter__cnt(filtered_stock_data_df)
            return filtered_stock_data_df

        def __call__(self):
            stock_data_df = self.load_stock_data_df(self.symbols)
            filtered_stock_data_df = self.filter_stock_data_df(stock_data_df)
            return filtered_stock_data_df

    class STOCK_FUNDAMENTAL_LOADER:
        def __init__(self, symbols) -> None:
            self.symbols = symbols

        @staticmethod
        def get_coef_array_list(array_list):
            try:
                lr = LinearRegression()
                lr.fit(
                    np.arange(len(array_list)).reshape(-1, 1),
                    np.array(array_list).reshape(-1, 1),
                )
                coef = lr.coef_[0][0]
            except:
                coef = 0

            return coef

        def load_net_profit_related(self, symbol):
            """당기순이익 관련 정보"""
            stock_net_profit_df = kq.account_history(symbol, "122700")
            stock_net_profit_df.sort_values("YEARMONTH", inplace=True)
            stock_net_profit_array_list = stock_net_profit_df["VALUE"].values
            stock_net_profit_coef = self.get_coef_array_list(
                stock_net_profit_array_list
            )
            stock_net_profit_latest_df = stock_net_profit_df.tail(1)
            stock_net_profit = stock_net_profit_latest_df["VALUE"].values[0]

            return {
                "net_profit": stock_net_profit,
                "net_profit_coef": stock_net_profit_coef,
            }

        def load_total_capital_related(self, symbol):
            """총 자본 관련 정보"""
            stock_total_capital_df = kq.account_history(symbol, "115000")
            stock_total_capital_df.sort_values("YEARMONTH", inplace=True)
            stock_total_capital_array_list = stock_total_capital_df["VALUE"].values
            stock_total_capital_coef = self.get_coef_array_list(
                stock_total_capital_array_list
            )
            stock_total_capital_latest_df = stock_total_capital_df.tail(1)
            stock_total_capital = stock_total_capital_latest_df["VALUE"].values[0]
            return {
                "total_capital": stock_total_capital,
                "total_capital_coef": stock_total_capital_coef,
            }

        class SYMBOL_RECENT_DATA:
            def __init__(self) -> None:
                rank_df = kq.rank_stocks()
                rank_df.set_index("SYMBOL", inplace=True)
                self.rank_df = rank_df

            def get_symbol_marketcap_dict(self):
                symbol_marketcap_dict = self.rank_df["MARKETCAP"].to_dict()
                return symbol_marketcap_dict

            def get_symbol_price_dict(self):
                symbol_price_dict = self.rank_df["CLOSE"].to_dict()
                return symbol_price_dict

            def get_symbol_marketshares_dict(self):
                symbol_marketshares_dict = (
                    self.rank_df["MARKETCAP"] / self.rank_df["CLOSE"]
                ).to_dict()
                return symbol_marketshares_dict

        def __call__(self):
            symbols = self.symbols
            symbol_results = list()
            for symbol in tqdm(symbols):
                try:
                    net_profit_related = self.load_net_profit_related(symbol)
                    total_capital_related = self.load_total_capital_related(symbol)
                    symbol_result = {
                        **net_profit_related,
                        **total_capital_related,
                        "SYMBOL": symbol,
                    }
                    symbol_results.append(symbol_result)
                except:
                    pass
                    # print(symbol)
            stock_fundamental_df = pd.DataFrame.from_dict(symbol_results)
            symbol_recent_data = self.SYMBOL_RECENT_DATA()

            symbol_marketcap_dict = symbol_recent_data.get_symbol_marketcap_dict()
            stock_fundamental_df["MARKETCAP"] = stock_fundamental_df["SYMBOL"].map(
                symbol_marketcap_dict
            )

            symbol_price_dict = symbol_recent_data.get_symbol_price_dict()
            stock_fundamental_df["PRICE"] = stock_fundamental_df["SYMBOL"].map(
                symbol_price_dict
            )

            symbol_marketshares_dict = symbol_recent_data.get_symbol_marketshares_dict()
            stock_fundamental_df["MARKETSHARES"] = stock_fundamental_df["SYMBOL"].map(
                symbol_marketshares_dict
            )

            return stock_fundamental_df

In [5]:
data_loader = DATA_LOADER()

stock_today_loader = data_loader.STOCK_TODAY_LOADER()
symbol_price_dict = stock_today_loader()

symbol_loader = data_loader.SYMBOL_LOADER()
symbols = symbol_loader()

In [6]:
import random

symbols = random.sample(symbols, 50)

In [7]:
stock_fundamental_loader = data_loader.STOCK_FUNDAMENTAL_LOADER(symbols)
stock_fundamental_df = stock_fundamental_loader()

100%|██████████| 50/50 [00:14<00:00,  3.36it/s]


In [92]:
# tmp = stock_fundamental_df[(stock_fundamental_df["net_profit_coef"] > 0)& (stock_fundamental_df["total_capital_coef"] > 0)]
tmp = stock_fundamental_df.copy()

tmp["per"] = tmp["MARKETCAP"] / tmp["net_profit"]
tmp["per_score"] = tmp["per"].rank(method="first", ascending=False)

tmp["pbr"] = tmp["MARKETCAP"] / (tmp["total_capital"])
tmp["pbr_score"] = tmp["pbr"].rank(method="first", ascending=False)

In [93]:
fundametnal_analysised_df = tmp.set_index("SYMBOL").loc[:, ["per_score", "pbr_score"]]

In [94]:
fundametnal_analysised_symbols = fundametnal_analysised_df.index

In [95]:
stock_technical_loader = data_loader.STOCK_TECHNICAL_LOADER(fundametnal_analysised_symbols)
stock_technical_df = stock_technical_loader()

100%|██████████| 27/27 [00:12<00:00,  2.24it/s]


In [96]:
# 기술적 분석
# SIMILARITY MODEL


class SIMILARITY_MODEL:
    def __init__(self, stock_df, CFG, n) -> None:
        self.stock_df = stock_df
        self.CFG = CFG
        self.n = n

        return None

    def get_x_y_dataset(self, array_ist):
        CFG = self.CFG
        i_window = CFG["input_window"]
        o_window = CFG["output_window"]

        x_dataset = list()
        y_dataset = list()

        for idx in range(len(array_ist) - i_window - o_window + 1):
            _x = array_ist[idx : idx + i_window]
            _y = array_ist[idx + i_window : idx + i_window + o_window]
            x_dataset.append(_x)
            y_dataset.append(_y)

        x_dataset = np.array(x_dataset)
        y_dataset = np.array(y_dataset).sum(axis=1)
        final_x = array_ist[-i_window:]

        return (final_x, x_dataset, y_dataset)

    class SIMILARITY_MODEL_MAIN:
        def get_similarity_main_df(self, x_dataset, y_dataset, final_x, n):
            def get_cosine_similarity(array_1, array_2):
                cosine_similarity = np.dot(array_1, array_2) / (
                    np.linalg.norm(array_1) * np.linalg.norm(array_2)
                )
                return cosine_similarity

            similarity_results = list()
            for x_data, y_data in zip(x_dataset, y_dataset):
                _similarity_score = get_cosine_similarity(x_data, final_x)
                similarity_results.append(
                    {
                        "similarity_score": _similarity_score,
                        "actual_y": y_data,
                    }
                )
            similarity_df = pd.DataFrame(similarity_results)
            similarity_main_df = similarity_df.nlargest(n, "similarity_score")
            return similarity_main_df

        @staticmethod
        def get_pred_y(similarity_df):
            pred_y = (
                similarity_df["similarity_score"] * similarity_df["actual_y"]
            ).mean()
            return pred_y

        def __call__(self, final_x, x_dataset, y_dataset, n):
            similarity_main_df = self.get_similarity_main_df(
                x_dataset, y_dataset, final_x, n
            )
            pred_y = self.get_pred_y(similarity_main_df)
            return pred_y

    def __call__(self):
        stock_df = self.stock_df
        array_list = stock_df["CHG_PCT"].values
        final_x, x_dataset, y_dataset = self.get_x_y_dataset(array_list)

        similarity_model_main = self.SIMILARITY_MODEL_MAIN()
        pred_y = similarity_model_main(final_x, x_dataset, y_dataset, self.n)
        return pred_y

In [97]:
CFG = {
    "dataset_window": 200,
    "input_window": 15,
    "output_window": 5,
}

In [98]:
technical_analysis_symbols = sorted(set(stock_technical_df["SYMBOL"]))

technical_analysis_result_dict = dict()
for symbol in technical_analysis_symbols:
    stock_df = stock_technical_df[stock_technical_df["SYMBOL"] == symbol]
    similarity_model = SIMILARITY_MODEL(stock_df, CFG, 5)
    pred_y = similarity_model()
    technical_analysis_result_dict[symbol] = pred_y

In [99]:
analised_df = fundametnal_analysised_df.copy()

In [100]:
analised_df["technical_pred_y"] = analised_df.index.map(technical_analysis_result_dict)

In [101]:
ssc = StandardScaler()

In [102]:
analised_df.loc[:,:] = ssc.fit_transform(analised_df)

In [103]:
analised_df["total"] = (
    analised_df["per_score"] * 0.3
    + analised_df["pbr_score"] * 0.7
)

In [104]:
main_df = analised_df.nlargest(5, "total")

In [105]:
main_df['price'] = main_df.index.map(symbol_price_dict)

In [106]:
main_df["cnt"] = 10000000 // main_df["price"]

In [107]:
order = list(main_df["cnt"].to_dict().items())

In [108]:
order

[('034810', 1251),
 ('088790', 4494),
 ('034220', 761),
 ('002170', 202),
 ('006370', 778)]

In [109]:
import datetime as dt
import logging

In [110]:
def trade_func(
    date: dt.date,
    dict_df_result: dict[str, pd.DataFrame],
    dict_df_position: dict[str, pd.DataFrame],
    logger: logging.Logger,
) -> list[tuple[str, int]]:
    r"""주식매매 지시함수

    주식매매 지시함수에 대한 설명

    :param dt.date date: 매매일 날짜
    :param dict[str, pd.DataFrame] dict_df_result: 매매일까지의 주문 및 체결 정보
    :param dict[str, pd.DataFrame] dict_df_position: 매매일의 주식 보유 정보
    :param logging.Logger logger: 로거
    :return list[tuple[str, int]]: 주식매매 지시
    """

    # 본 예제코드는 코드가 실제로 실행되는 것을 보여주기 위해
    # 심사 투자기간이 2023년 1월 2일부터 시작된다고 가정하고 있습니다.
    # 실제 제출코드에서는 실제 심사 투자기간을 사용해야 합니다.

    # 시가총액 상위 5개 주식

    if date == dt.date(2023, 1, 2):  # 투자 시작일
        # 각 종목을 10주씩 매수
        symbols_and_orders = order
    else:
        symbols_and_orders = []

    return symbols_and_orders

In [111]:
import kquant as kq

# backtest_stock_port_daily 함수를 사용하는 경우
dict_df_result = kq.backtest_stock_port_daily(
    trade_func,
    "2023-01-02",  # 실제 심사에서는 투자기간 시작일
    "2023-08-30",  # 실제 심사에서는 투자기간 종료일
    init_cash=1_000_000_000,  # 10억원
)

[2023-01-02] 종목: 034810, 주문전 보유수량:      0 주문수량:  1,251, 매매수량:  1,251, 주문후 보유수량:  1,251
[2023-01-02] 종목: 088790, 주문전 보유수량:      0 주문수량:  4,494, 매매수량:  4,494, 주문후 보유수량:  4,494
[2023-01-02] 종목: 034220, 주문전 보유수량:      0 주문수량:    761, 매매수량:    761, 주문후 보유수량:    761
[2023-01-02] 종목: 002170, 주문전 보유수량:      0 주문수량:    202, 매매수량:    202, 주문후 보유수량:    202
[2023-01-02] 종목: 006370, 주문전 보유수량:      0 주문수량:    778, 매매수량:    778, 주문후 보유수량:    778


In [112]:
dict_df_result['TOTAL']

Unnamed: 0,DATE,SYMBOL,PRICE,ORDER,QTY,TRADE_PRICE,POSITION,AVG_PRICE,FEE,TRADE_TAX,SLIPPAGE,CASHFLOW,CASH,HIST_VALUE,STOCK_VALUE,TOTAL_VALUE,REAL_PROFIT,UNREAL_PROFIT,PROFIT,HIGHWATERMARK,DRAWDOWN
0,2023-01-02,TOTAL,0,0,0,0,7486,0.0000,0,0,0,-51721040,948278960,51721040,51721040,1000000000,0,0,0,1000000000,0
1,2023-01-03,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,51976990,1000255950,0,255950,255950,1000255950,0
2,2023-01-04,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,52484110,1000763070,0,763070,763070,1000763070,0
3,2023-01-05,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,52708080,1000987040,0,987040,987040,1000987040,0
4,2023-01-06,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,52897350,1001176310,0,1176310,1176310,1001176310,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,2023-08-24,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,49418700,997697660,0,-2302340,-2302340,1005503220,7805560
161,2023-08-25,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,50392870,998671830,0,-1328170,-1328170,1005503220,6831390
162,2023-08-28,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,50588860,998867820,0,-1132180,-1132180,1005503220,6635400
163,2023-08-29,TOTAL,0,0,0,0,7486,0.0000,0,0,0,0,948278960,51721040,50741800,999020760,0,-979240,-979240,1005503220,6482460


In [None]:

# backtest_update_stock_port_daily 함수를 사용하는 경우
dict_df_result, dict_df_position = kq.backtest_update_stock_port_daily(
   trade_func, 
   "2023-01-02",
   init_cash=1_000_000_000,
)
dict_df_result, dict_df_position = kq.backtest_update_stock_port_daily(
    trade_func, 
   "2023-01-03",
   dict_df_result, 
   dict_df_position,
)   