### 01. 데이터 불러오기 및 셋팅

In [1]:
# 데이터 불러오기
import pandas as pd
dataset = pd.read_csv('./total_price.csv')

# Del Unnamed: 0
del dataset['Unnamed: 0']

# l1~l4 line Setting
for step in ['l1', 'l2', 'l3', 'l4'] : 
    dataset[step] = dataset[step].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
    
# Warning Message
import warnings
warnings.filterwarnings(action='ignore') 

# 작업하는 모든 코드 리스트
code_list = dataset.code.unique()

### 02. 학습 진행

In [2]:
# 오늘의 예측 데이터프레임
today_df = pd.DataFrame()

# 진행중 계산
step = 1

# 모든 코드 진행
for code in code_list[:200] : 
    
    # 코드 변환
    code_1 = code
    code_2 = str(code).rjust(6, '0')
    
    # 진행 결과문 표시
    if step % 250 == 1 :
        print('현재 {:4}번째 종목 진행중...({:5.2f}%)' .format(step, 100*step/len(code_list)))
    step += 1
    
    # 코드에 해당하는 데이터만 추출
    data = dataset[dataset['code'] == code_1]
    
    data['open'] = data['open'].apply(lambda x : float(x))
    data['high'] = data['high'].apply(lambda x : float(x))
    data['low'] = data['low'].apply(lambda x : float(x))
    data['close'] = data['close'].apply(lambda x : float(x))
    
    # 과거 5일간의 종가
    data['close_1'] = data['close'].shift(1)
    data['close_2'] = data['close'].shift(2)
    data['close_3'] = data['close'].shift(3)
    data['close_4'] = data['close'].shift(4)
    data['close_5'] = data['close'].shift(5)

    # 내일의 변동율 계산
    data['target'] = data['close'].shift(-1)
    data = data.dropna(axis=0)
    
    # 입력과 출력 지정
    X = data[data.columns[1:-2]]
    y = data[data.columns[-1]]

    # 학습과 검증 데이터로 분리
    from sklearn.model_selection import train_test_split
    X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.20, random_state=1234)

    # LightGBM을 위한 데이터셋으로 변경
    import lightgbm as lgb
    train_ds = lgb.Dataset(X_train, label=y_train) 
    valid_ds = lgb.Dataset(X_valid, label=y_valid)
    
    # Hypter Parameter Setting
    params = {'learning_rate': 0.1, 
              'max_depth': 10, 
              'boosting' : 'gbdt', 
              'objective' : 'regression', 
              'metric' : 'mse', 
              'is_training_metric' : True, 
              'num_leaves' : 10000, 
              'feature_fraction' : 0.9, 
              'bagging_fraction' : 0.7, 
              'bagging_freq': 10, 
              'seed' : 1234}

    # Model 학습
    model = lgb.train(params, train_ds, 1000, valid_ds, verbose_eval=0, early_stopping_rounds=100)
    
    # ssl Setting
    import ssl
    ssl._create_default_https_context = ssl._create_unverified_context

    # BeautifulSoup
    from urllib.request import urlopen
    from bs4 import BeautifulSoup
    import re

    # Test Date
    start_date = '200301'
    end_date = '200313'

    # url and html
    url = "https://lab.donutz.co/krx/products/" + code_2 + "?sdate=" + start_date + "&edate=" + end_date
    html = urlopen(url)  

    # Crawling
    bsObject = BeautifulSoup(html, "html.parser") 
    soup_string = str(bsObject)

    # 데이터 정규식
    def clean(data):
        text = re.sub('[a-zA-Z-=+#/\?^$@*\"※~&%ㆍ!_』:\\‘|\(\)\[\]\<\>`\'{}…》]', '', data)
        return text

    # Text Split
    text = re.split('[,]+',clean(soup_string))
    information = []
    for i in text:
        information.append(i)

    # Test Dataset Handling
    import numpy as np
    test_data = pd.DataFrame(np.array(information).reshape(-1,15))
    test_data.columns = ['date','open','high','low','close','trading_volume',\
                         'score','index','probability','l1','l2','l3','l4','lgap','lrate']
    test_data = test_data.set_index('date')
    test_data['open'] = test_data['open'].apply(lambda x : float(x))
    test_data['high'] = test_data['high'].apply(lambda x : float(x))
    test_data['low'] = test_data['low'].apply(lambda x : float(x))
    test_data['close'] = test_data['close'].apply(lambda x : float(x))
    test_data['close_1'] = test_data['close'].shift(1)
    test_data['close_2'] = test_data['close'].shift(2)
    test_data['close_3'] = test_data['close'].shift(3)
    test_data['close_4'] = test_data['close'].shift(4)
    test_data['close_5'] = test_data['close'].shift(5)
    test_data['trading_volume'] = test_data['trading_volume'].apply(lambda x : float(x))
    test_data['score'] = test_data['score'].apply(lambda x : float(x))
    test_data['index'] = test_data['index'].apply(lambda x : float(x))
    test_data['probability'] = test_data['probability'].apply(lambda x : float(x))
    test_data['l1'] = test_data['l1'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
    test_data['l2'] = test_data['l2'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
    test_data['l3'] = test_data['l3'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
    test_data['l4'] = test_data['l4'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
    test_data['lgap'] = test_data['lgap'].apply(lambda x : float(x))
    test_data['lrate'] = test_data['lrate'].apply(lambda x : float(x))

    # DataFrame Setting
    X_test = pd.DataFrame(test_data)

    # Predict rate

    ccc = 100 * (np.array(X_test.close.shift(-1)) - np.array(X_test.close)) / np.array(X_test.close)
    ddd = 100 * (model.predict(X_test) - np.array(X_test.close)) / np.array(X_test.close)

    # DataFrame Result
    predict = pd.DataFrame({'오늘 실제 종가' : np.array(X_test.close),
                            '내일 실제 종가' : np.array(X_test.close.shift(-1)),
                            '내일 예측 종가' : model.predict(X_test),
                            '실제 내일 변동(%)' : ccc,
                            '모델 예상 내일 변동(%)' : ddd})
    # Result
    predict = predict[-1:]
    predict.index = [code_2]
    today_df = today_df.append(predict)

현재    1번째 종목 진행중...( 0.05%)


In [3]:
# 가장 높은 수익률 예상 종목
today_df = today_df.sort_values('모델 예상 내일 변동(%)', ascending=False)

# 이상값 제거 (가격 변동폭은 -30 ~ +30)
today_df = today_df[today_df['모델 예상 내일 변동(%)'] < 10][today_df['모델 예상 내일 변동(%)'] > -10]

# 필요한 변수만 추출
today_df = today_df[['오늘 실제 종가', '내일 예측 종가', '모델 예상 내일 변동(%)']]

# 형식 변경
today_df['오늘 실제 종가'] = today_df['오늘 실제 종가'].apply(lambda x : int(x))
today_df['내일 예측 종가'] = today_df['내일 예측 종가'].apply(lambda x : int(x))
today_df['모델 예상 내일 변동(%)'] = today_df['모델 예상 내일 변동(%)'].apply(lambda x : round(x,2))

# 수익률 예상 상위 10개 종목 추천
today_df = today_df[-10:]

# 최종 결과
today_df

Unnamed: 0,오늘 실제 종가,내일 예측 종가,모델 예상 내일 변동(%)
1067,38650,37107,-3.99
1530,64400,61415,-4.63
145,10300,9797,-4.88
725,85700,81421,-4.99
250,40650,38573,-5.11
2210,9690,9172,-5.34
547,15900,15044,-5.38
227,16500,15519,-5.94
2070,936,876,-6.37
2100,7280,6589,-9.48


### 결과쏴서 트래킹

In [6]:
# import requests
# import json

# def send_predicts(predict):
#     payload = json.dumps(predict)
#     print(payload)
#     url = "https://lab.donutz.co/tensors"
#     headers = {'Content-Type': 'application/json; charset=utf-8'}
#     #response = requests.get(url, params=paramDict)
#     response = requests.post(url, data=payload, headers=headers, verify=False)
#     code = response.status_code
#     res = response.text
#     return (code, res)

# predict = [
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "002100",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 6589
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "002070",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 876
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "000227",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 15519
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "000547",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 15044
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "002210",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 9172
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "000250",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 38573
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "000725",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 81421
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "000145",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 9797
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "001530",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 61415
#     },
#     {
#         "model": "Test_PSH_LightGBM",
#         "seed": 1234,
#         "type":"close",
#         "code": "001067",
#         "date": "20-03-13 00:00:00",
#         "prdate": 0,
#         "value": 37107
#     }
# ]
# print(send_predicts(predict))

[{"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "002100", "date": "20-03-13 00:00:00", "prdate": 0, "value": 6589}, {"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "002070", "date": "20-03-13 00:00:00", "prdate": 0, "value": 876}, {"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "000227", "date": "20-03-13 00:00:00", "prdate": 0, "value": 15519}, {"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "000547", "date": "20-03-13 00:00:00", "prdate": 0, "value": 15044}, {"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "002210", "date": "20-03-13 00:00:00", "prdate": 0, "value": 9172}, {"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "000250", "date": "20-03-13 00:00:00", "prdate": 0, "value": 38573}, {"model": "Test_PSH_LightGBM", "seed": 1234, "type": "close", "code": "000725", "date": "20-03-13 00:00:00", "prdate": 0, "value": 81421}, {"model": "Test_PSH_LightGBM",