In [1]:
import kquant as kq
from tqdm import tqdm

In [2]:
# set API account
def set_api_account():
    kq.set_api("KRX2308020", "EQDkUcyI3dK6oIAXqAR8BXOK4bKxHHmH")
    return None

In [3]:
set_api_account()

In [4]:
class DATA_LOADER:
    class SYMBOL_LOADER:
        @staticmethod
        def load_symbols_df():
            symbols_df = kq.symbol_stock()
            return symbols_df

        class SYMBOL_FILTER:
            @staticmethod
            def filter__market(symbols_df):
                filtered_symbols_df = symbols_df[
                    (symbols_df["MARKET"].isin(["코스닥", "유가증권"]))
                ].copy()
                return filtered_symbols_df

            @staticmethod
            def filter__admin_issue(symbols_df):
                filtered_symbols_df = symbols_df[
                    (symbols_df["ADMIN_ISSUE"] == 0)
                ].copy()
                return filtered_symbols_df

            @staticmethod
            def filter_sec_type(symbols_df):
                filtered_symbols_df = symbols_df[
                    symbols_df["SEC_TYPE"].isin(["ST", "EF", "EN"])
                ].copy()
                return filtered_symbols_df

        def filter_symbols_df(self, symbols_df):
            symbol_filter = self.SYMBOL_FILTER()
            filtered_symbols_df = symbol_filter.filter__market(symbols_df)
            filtered_symbols_df = symbol_filter.filter__admin_issue(filtered_symbols_df)
            filtered_symbols_df = symbol_filter.filter_sec_type(filtered_symbols_df)
            return filtered_symbols_df

        @staticmethod
        def get_symbols(symbols_df):
            symbols = sorted(set(symbols_df["SYMBOL"]))
            return symbols

        # SYMBOL_LOADER PIPELINE
        def __call__(self):
            symbols_df = self.load_symbols_df()
            filtered_symbols_df = self.filter_symbols_df(symbols_df)
            symbols = self.get_symbols(filtered_symbols_df)
            return symbols

    class STOCK_LOADER:
        def __init__(self, symbols) -> None:
            self.symbols = symbols

        @staticmethod
        def load_stock_data_df(symbols):
            stock_data_df_list = list()
            for symbol in tqdm(symbols):
                _stock_data_df = kq.daily_stock(symbol)
                stock_data_df_list.append(_stock_data_df)
            stock_data_df = pd.concat(stock_data_df_list, axis=0)
            return stock_data_df

        class STOCK_FILTER:
            @staticmethod
            def filter__zero_row(stock_data_df):
                columns = ["VOLUME", "OPEN", "HIGH", "LOW", "CLOSE"]
                filtered_stock_data_df = stock_data_df[
                    (stock_data_df.loc[:, columns].all(axis=1))
                ].copy()
                return filtered_stock_data_df

            @staticmethod
            def filter__na_row(stock_data_df):
                columns = ["VOLUME", "OPEN", "HIGH", "LOW", "CLOSE"]
                filtered_stock_data_df = stock_data_df[
                    ~(stock_data_df[columns].isna().any(axis=1))
                ].copy()
                return filtered_stock_data_df

            @staticmethod
            def filter__cnt(stock_data_df):
                stock_cnt_series = stock_data_df.groupby("SYMBOL").count()["CLOSE"]
                stock_mode_cnt = stock_cnt_series.mode()[0]

                cnt_filtered_series = stock_cnt_series[
                    stock_cnt_series == stock_mode_cnt
                ]
                cnt_filtered_stocks = cnt_filtered_series.index

                filtered_stock_data_df = stock_data_df[
                    stock_data_df["SYMBOL"].isin(cnt_filtered_stocks)
                ].copy()
                return filtered_stock_data_df

        def filter_stock_data_df(self, stock_data_df):
            stock_filter = self.STOCK_FILTER()
            filtered_stock_data_df = stock_filter.filter__zero_row(stock_data_df)
            filtered_stock_data_df = stock_filter.filter__na_row(filtered_stock_data_df)
            filtered_stock_data_df = stock_filter.filter__cnt(filtered_stock_data_df)
            return filtered_stock_data_df

        def __call__(self):
            stock_data_df = self.load_stock_data_df(self.symbols)
            filtered_stock_data_df = self.filter_stock_data_df(stock_data_df)
            return filtered_stock_data_df

In [5]:
data_loader = DATA_LOADER()

symbol_loader = data_loader.SYMBOL_LOADER()
symbols = symbol_loader()

In [6]:
## TEMPOLARY
symbols = symbols[:30]

In [7]:
stock_loader = data_loader.STOCK_LOADER(symbols)
stocks_df = stock_loader()

100%|██████████| 30/30 [00:14<00:00,  2.01it/s]


In [8]:
symbols = sorted(set(stocks_df["SYMBOL"]))

In [9]:
symbol = symbols[0]

In [10]:
stock_df = stocks_df[stocks_df["SYMBOL"] == symbol]

In [11]:
# 기술적 분석
# SIMILARITY MODEL

class SIMILARITY_MODEL:
    """
    def __init__(self, stock_df, CFG, n) -> None:
        self.stock_df = stock_df
        self.CFG = CFG
        self.n = n

        return None
    """

    def __init__(self, stock_df, CFG, n) -> None:
        self.stock_df = stock_df
        self.CFG = CFG
        self.n = n

        return None

    def get_x_y_dataset(self, array_ist):
        CFG = self.CFG
        i_window = CFG["input_window"]
        o_window = CFG["output_window"]

        x_dataset = list()
        y_dataset = list()

        for idx in range(len(array_ist) - i_window - o_window + 1):
            _x = array_ist[idx : idx + i_window]
            _y = array_ist[idx + i_window : idx + i_window + o_window]
            x_dataset.append(_x)
            y_dataset.append(_y)

        x_dataset = np.array(x_dataset)
        y_dataset = np.array(y_dataset).sum(axis=1)
        final_x = array_ist[-i_window:]

        return (final_x, x_dataset, y_dataset)

    class SIMILARITY_MODEL_MAIN:
        def get_similarity_main_df(self, x_dataset, y_dataset, final_x, n):
            def get_cosine_similarity(array_1, array_2):
                cosine_similarity = np.dot(array_1, array_2) / (
                    np.linalg.norm(array_1) * np.linalg.norm(array_2)
                )
                return cosine_similarity

            similarity_results = list()
            for x_data, y_data in zip(x_dataset, y_dataset):
                _similarity_score = get_cosine_similarity(x_data, final_x)
                similarity_results.append(
                    {
                        "similarity_score": _similarity_score,
                        "actual_y": y_data,
                    }
                )
            similarity_df = pd.DataFrame(similarity_results)
            similarity_main_df = similarity_df.nlargest(n, "similarity_score")
            return similarity_main_df

        @staticmethod
        def get_pred_y(similarity_df):
            pred_y = (
                similarity_df["similarity_score"] * similarity_df["actual_y"]
            ).mean()
            return pred_y

        def __call__(self, final_x, x_dataset, y_dataset, n):
            similarity_main_df = self.get_similarity_main_df(
                x_dataset, y_dataset, final_x, n
            )
            pred_y = self.get_pred_y(similarity_main_df)
            return pred_y

    def __call__(self):
        stock_df = self.stock_df
        array_list = stock_df["CHG_PCT"].values
        final_x, x_dataset, y_dataset = self.get_x_y_dataset(array_list)

        similarity_model_main = self.SIMILARITY_MODEL_MAIN()
        pred_y = similarity_model_main(final_x, x_dataset, y_dataset, self.n)
        return pred_y

In [12]:
CFG = {
    "dataset_window": 200,
    "input_window": 15,
    "output_window": 5,
}

In [13]:
similarity_model = SIMILARITY_MODEL(stock_df, CFG, 5)
pred_y = similarity_model()
pred_y

0.7378725327341311

In [14]:
# 당기순이익
stock_net_profit_df = kq.account_history(symbol, "122700")
stock_net_profit_df.sort_values("YEARMONTH", inplace=True)
stock_net_profit_latest_df = stock_net_profit_df.tail(1)

In [15]:
stock_net_profit_array_list = stock_net_profit_df["VALUE"].values
lr.fit(
    np.arange(len(stock_net_profit_array_list)).reshape(-1, 1),
    np.array(stock_net_profit_array_list).reshape(-1, 1),
)
stock_net_profit_coef = lr.coef_

NameError: name 'lr' is not defined

In [16]:
# 총자본 (총자산 - 총부채)
stock_total_capitial_df = kq.account_history(symbol, "115000")
stock_total_capitial_df.sort_values("YEARMONTH", inplace=True)
stock_total_capitial_latest_df = stock_total_capitial_df.tail(1)

In [161]:
stock_total_capital_array_list = stock_total_capitial_df["VALUE"].values
lr.fit(
    np.arange(len(stock_total_capital_array_list)).reshape(-1, 1),
    np.array(stock_total_capital_array_list).reshape(-1, 1),
)
stock_total_capital_coef = lr.coef_

In [169]:
stock_latest_df = stock_df.tail(1)

# 시가총액
stock_marketcap = stock_latest_df["MARKETCAP"].values[0]
# 총 주식 수
stock_shares = stock_latest_df["SHARES"].values[0]
# 한 주의 가격
stock_price = stock_latest_df["CLOSE"].values[0]
# 당기 순이익
stock_net_profit = stock_net_profit_latest_df["VALUE"].values[0]

# 총 자본(총 자산 - 총 부채)
stock_total_capital = stock_total_capitial_latest_df["VALUE"].values[0]

In [170]:
stock_eps = stock_net_profit / stock_shares
stock_per = stock_marketcap / stock_net_profit
stock_pbr = stock_price / (stock_total_capital / stock_shares)

In [171]:
stock_price / (stock_total_capital / stock_shares)

744.9901650037334

In [172]:
stock_marketcap / stock_net_profit

13065.865553621126

In [173]:
stock_eps

0.7730065764530116

In [174]:
stock_per

13065.865553621126

In [175]:
stock_pbr

744.9901650037334