In [30]:
import datetime as dt
import numpy as np
import pandas as pd

from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

import kquant as kq
from tqdm import tqdm

In [12]:
# set API account
def set_api_account():
    kq.set_api("KRX2308020", "EQDkUcyI3dK6oIAXqAR8BXOK4bKxHHmH")
    return None

In [21]:
class DATA_LOADER:
    class STOCK_TODAY_LOADER:
        @staticmethod
        def load_stock_today_df():
            stock_today_df = kq.rank_stocks()
            return stock_today_df

        @staticmethod
        def get_symbol_price_dict(stock_today_df):
            symbol_price_dict = stock_today_df.set_index("SYMBOL")["CLOSE"].to_dict()
            return symbol_price_dict

        def __call__(self):
            stock_today_df = self.load_stock_today_df()
            symbol_price_dict = self.get_symbol_price_dict(stock_today_df)
            return symbol_price_dict

    class SYMBOL_LOADER:
        @staticmethod
        def load_symbols_df():
            symbols_df = kq.symbol_stock()
            return symbols_df

        class SYMBOL_FILTER:
            @staticmethod
            def filter__market(symbols_df):
                filtered_symbols_df = symbols_df[
                    (symbols_df["MARKET"].isin(["코스닥", "유가증권"]))
                ].copy()
                return filtered_symbols_df

            @staticmethod
            def filter__admin_issue(symbols_df):
                filtered_symbols_df = symbols_df[
                    (symbols_df["ADMIN_ISSUE"] == 0)
                ].copy()
                return filtered_symbols_df

            @staticmethod
            def filter_sec_type(symbols_df):
                filtered_symbols_df = symbols_df[
                    symbols_df["SEC_TYPE"].isin(["ST", "EF", "EN"])
                ].copy()
                return filtered_symbols_df

        def filter_symbols_df(self, symbols_df):
            symbol_filter = self.SYMBOL_FILTER()
            filtered_symbols_df = symbol_filter.filter__market(symbols_df)
            filtered_symbols_df = symbol_filter.filter__admin_issue(filtered_symbols_df)
            filtered_symbols_df = symbol_filter.filter_sec_type(filtered_symbols_df)
            return filtered_symbols_df

        @staticmethod
        def get_symbols(symbols_df):
            symbols = sorted(set(symbols_df["SYMBOL"]))
            return symbols

        # SYMBOL_LOADER PIPELINE
        def __call__(self):
            symbols_df = self.load_symbols_df()
            filtered_symbols_df = self.filter_symbols_df(symbols_df)
            symbols = self.get_symbols(filtered_symbols_df)
            return symbols

    class STOCK_TECHNICAL_LOADER:
        def __init__(self, symbols, date=None) -> None:
            self.symbols = symbols
            self.date = date

        def load_stock_data_df(self, symbols):
            stock_data_df_list = list()
            for symbol in tqdm(symbols):
                if self.date:
                    _stock_data_df = kq.daily_stock(symbol, end_date=self.date)
                else:
                    _stock_data_df = kq.daily_stock(symbol)
                stock_data_df_list.append(_stock_data_df)
            stock_data_df = pd.concat(stock_data_df_list, axis=0)
            return stock_data_df

        class STOCK_FILTER:
            @staticmethod
            def filter__zero_row(stock_data_df):
                columns = ["VOLUME", "OPEN", "HIGH", "LOW", "CLOSE"]
                filtered_stock_data_df = stock_data_df[
                    (stock_data_df.loc[:, columns].all(axis=1))
                ].copy()
                return filtered_stock_data_df

            @staticmethod
            def filter__na_row(stock_data_df):
                columns = ["VOLUME", "OPEN", "HIGH", "LOW", "CLOSE"]
                filtered_stock_data_df = stock_data_df[
                    ~(stock_data_df[columns].isna().any(axis=1))
                ].copy()
                return filtered_stock_data_df

            @staticmethod
            def filter__cnt(stock_data_df):
                stock_cnt_series = stock_data_df.groupby("SYMBOL").count()["CLOSE"]
                stock_mode_cnt = stock_cnt_series.mode()[0]

                cnt_filtered_series = stock_cnt_series[
                    stock_cnt_series == stock_mode_cnt
                ]
                cnt_filtered_stocks = cnt_filtered_series.index

                filtered_stock_data_df = stock_data_df[
                    stock_data_df["SYMBOL"].isin(cnt_filtered_stocks)
                ].copy()
                return filtered_stock_data_df

        def filter_stock_data_df(self, stock_data_df):
            stock_filter = self.STOCK_FILTER()
            filtered_stock_data_df = stock_filter.filter__zero_row(stock_data_df)
            filtered_stock_data_df = stock_filter.filter__na_row(filtered_stock_data_df)
            filtered_stock_data_df = stock_filter.filter__cnt(filtered_stock_data_df)
            return filtered_stock_data_df

        def __call__(self):
            stock_data_df = self.load_stock_data_df(self.symbols)
            filtered_stock_data_df = self.filter_stock_data_df(stock_data_df)
            return filtered_stock_data_df

    class STOCK_FUNDAMENTAL_LOADER:
        def __init__(self, symbols) -> None:
            self.symbols = symbols

        @staticmethod
        def get_coef_array_list(array_list):
            try:
                lr = LinearRegression()
                lr.fit(
                    np.arange(len(array_list)).reshape(-1, 1),
                    np.array(array_list).reshape(-1, 1),
                )
                coef = lr.coef_[0][0]
            except:
                coef = 0

            return coef

        def load_net_profit_related(self, symbol):
            """당기순이익 관련 정보"""
            stock_net_profit_df = kq.account_history(symbol, "122700")
            stock_net_profit_df.sort_values("YEARMONTH", inplace=True)
            stock_net_profit_array_list = stock_net_profit_df["VALUE"].values
            stock_net_profit_coef = self.get_coef_array_list(
                stock_net_profit_array_list
            )
            stock_net_profit_latest_df = stock_net_profit_df.tail(1)
            stock_net_profit = stock_net_profit_latest_df["VALUE"].values[0]

            return {
                "net_profit": stock_net_profit,
                "net_profit_coef": stock_net_profit_coef,
            }

        def load_total_capital_related(self, symbol):
            """총 자본 관련 정보"""
            stock_total_capital_df = kq.account_history(symbol, "115000")
            stock_total_capital_df.sort_values("YEARMONTH", inplace=True)
            stock_total_capital_array_list = stock_total_capital_df["VALUE"].values
            stock_total_capital_coef = self.get_coef_array_list(
                stock_total_capital_array_list
            )
            stock_total_capital_latest_df = stock_total_capital_df.tail(1)
            stock_total_capital = stock_total_capital_latest_df["VALUE"].values[0]
            return {
                "total_capital": stock_total_capital,
                "total_capital_coef": stock_total_capital_coef,
            }

        class SYMBOL_RECENT_DATA:
            def __init__(self) -> None:
                rank_df = kq.rank_stocks()
                rank_df.set_index("SYMBOL", inplace=True)
                self.rank_df = rank_df

            def get_symbol_marketcap_dict(self):
                symbol_marketcap_dict = self.rank_df["MARKETCAP"].to_dict()
                return symbol_marketcap_dict

            def get_symbol_price_dict(self):
                symbol_price_dict = self.rank_df["CLOSE"].to_dict()
                return symbol_price_dict

            def get_symbol_marketshares_dict(self):
                symbol_marketshares_dict = (
                    self.rank_df["MARKETCAP"] / self.rank_df["CLOSE"]
                ).to_dict()
                return symbol_marketshares_dict

        def __call__(self):
            symbols = self.symbols
            symbol_results = list()
            for symbol in tqdm(symbols):
                try:
                    net_profit_related = self.load_net_profit_related(symbol)
                    total_capital_related = self.load_total_capital_related(symbol)
                    symbol_result = {
                        **net_profit_related,
                        **total_capital_related,
                        "SYMBOL": symbol,
                    }
                    symbol_results.append(symbol_result)
                except:
                    pass
                    # print(symbol)
            stock_fundamental_df = pd.DataFrame.from_dict(symbol_results)
            symbol_recent_data = self.SYMBOL_RECENT_DATA()

            symbol_marketcap_dict = symbol_recent_data.get_symbol_marketcap_dict()
            stock_fundamental_df["MARKETCAP"] = stock_fundamental_df["SYMBOL"].map(
                symbol_marketcap_dict
            )

            symbol_price_dict = symbol_recent_data.get_symbol_price_dict()
            stock_fundamental_df["PRICE"] = stock_fundamental_df["SYMBOL"].map(
                symbol_price_dict
            )

            symbol_marketshares_dict = symbol_recent_data.get_symbol_marketshares_dict()
            stock_fundamental_df["MARKETSHARES"] = stock_fundamental_df["SYMBOL"].map(
                symbol_marketshares_dict
            )

            return stock_fundamental_df

In [22]:
set_api_account()

In [23]:
data_loader = DATA_LOADER()

stock_today_loader = data_loader.STOCK_TODAY_LOADER()
symbol_price_dict = stock_today_loader()

symbol_loader = data_loader.SYMBOL_LOADER()
symbols = symbol_loader()

In [24]:
import random

symbols = random.sample(symbols, 300)

In [25]:
stock_fundamental_loader = data_loader.STOCK_FUNDAMENTAL_LOADER(symbols)
stock_fundamental_df = stock_fundamental_loader()

100%|██████████| 50/50 [00:09<00:00,  5.33it/s]


In [26]:
# tmp = stock_fundamental_df[(stock_fundamental_df["net_profit_coef"] > 0)& (stock_fundamental_df["total_capital_coef"] > 0)]
tmp = stock_fundamental_df.copy()

tmp["per"] = tmp["MARKETCAP"] / tmp["net_profit"]
tmp["per_score"] = tmp["per"].rank(method="first", ascending=False)

tmp["pbr"] = tmp["MARKETCAP"] / (tmp["total_capital"])
tmp["pbr_score"] = tmp["pbr"].rank(method="first", ascending=False)

In [27]:
fundametnal_analysised_df = tmp.set_index("SYMBOL").loc[:, ["per_score", "pbr_score"]]

In [28]:
fundametnal_analysised_symbols = fundametnal_analysised_df.index

In [31]:
stock_technical_loader = data_loader.STOCK_TECHNICAL_LOADER(
    symbols=fundametnal_analysised_symbols, date="2023-05-05"
)
stock_technical_df = stock_technical_loader()

  0%|          | 0/32 [00:00<?, ?it/s]


ValueError: 데이터 없음

In [None]:
analised_df = fundametnal_analysised_df.copy()

In [None]:
ssc = StandardScaler()

analised_df.loc[:,:] = ssc.fit_transform(analised_df)

analised_df["total"] = (
    analised_df["per_score"] * 0.3
    + analised_df["pbr_score"] * 0.7
)