### 00. 데이터 불러오기

In [1]:
import pandas as pd
dataset = pd.read_csv('./total_price.csv')

In [2]:
del dataset['Unnamed: 0']

In [3]:
dataset['l1'] = dataset['l1'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
dataset['l2'] = dataset['l2'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
dataset['l3'] = dataset['l3'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))
dataset['l4'] = dataset['l4'].apply(lambda x : str(x)).apply(lambda x : x[1:]).apply(lambda x : float(x))

### 01. 예측하고 싶은 종목

In [4]:
code = 47310

### 02. 예측하고 싶은 종목에 대하여 모델 생성 및 예측

In [5]:
import warnings
warnings.filterwarnings(action='ignore') 

In [6]:
data = dataset[dataset['code'] == code]
data['target'] = data['close'].shift(-1)
data = data.dropna(axis=0)
X = data[data.columns[1:-2]]
y = data[data.columns[-1]]

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.10, random_state=1234)

In [8]:
import lightgbm as lgb
train_ds = lgb.Dataset(X_train, label = y_train) 
valid_ds = lgb.Dataset(X_valid, label = y_valid) 

In [9]:
params = {'learning_rate': 0.01, 
          'max_depth': 5, 
          'boosting': 'gbdt', 
          'objective': 'regression', 
          'metric': 'mse', 
          'is_training_metric': True, 
          'num_leaves': 144, 
          'feature_fraction': 0.9, 
          'bagging_fraction': 0.7, 
          'bagging_freq': 5, 
          'seed':1234}

In [10]:
model = lgb.train(params, train_ds, 1000, valid_ds, verbose_eval=100, early_stopping_rounds=100)

Training until validation scores don't improve for 100 rounds
[100]	valid_0's l2: 771457
[200]	valid_0's l2: 148528
[300]	valid_0's l2: 65120.1
[400]	valid_0's l2: 54412.2
[500]	valid_0's l2: 54012.5
[600]	valid_0's l2: 52740.1
[700]	valid_0's l2: 53296.8
Early stopping, best iteration is:
[600]	valid_0's l2: 52740.1


In [11]:
predict_train = model.predict(X_train)
predict_test = model.predict(X_valid)

In [12]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_valid, predict_test)
r2 = r2_score(y_valid, predict_test)

In [13]:
print('Mean squared error: ', mse)
print('Root Mean squared error: ', mse**.5)
print('R2 score: ', r2)

Mean squared error:  52740.06164579561
Root Mean squared error:  229.65204472374205
R2 score:  0.9902407331715533


In [14]:
import numpy as np
aaa = 100 * (np.array(y_valid) - np.array(X_valid.close)) / np.array(X_valid.close)
bbb = 100 * (model.predict(X_valid) - np.array(X_valid.close)) / np.array(X_valid.close)

In [15]:
predict = pd.DataFrame({'오늘 실제 종가' : np.array(X_valid.close),
                        '내일 실제 종가' : np.array(y_valid), 
                        '내일 예측 종가' : model.predict(X_valid),
                        '실제 내일 변동(%)' : aaa,
                        '모델 예상 내일 변동(%)' : bbb})
round(predict, 2)

Unnamed: 0,오늘 실제 종가,내일 실제 종가,내일 예측 종가,실제 내일 변동(%),모델 예상 내일 변동(%)
0,6000,5640.0,5903.73,-6.00,-1.60
1,5230,5450.0,5227.57,4.21,-0.05
2,3985,3965.0,4008.38,-0.50,0.59
3,5840,5730.0,5905.05,-1.88,1.11
4,4770,4780.0,4757.06,0.21,-0.27
...,...,...,...,...,...
86,5550,5450.0,5535.27,-1.80,-0.27
87,7480,7460.0,7634.27,-0.27,2.06
88,5740,5770.0,5762.98,0.52,0.40
89,4405,4275.0,4477.44,-2.95,1.64


### 03. Test  데이터 예시

In [16]:
# https://lab.donutz.co/krx/products/047310?sdate=200226&edate=200305

In [27]:
ttt = [{"date":"2020-02-03","open":8680,"high":9120,"low":8570,"close":9100,"trading_volume":1123451,"score":7510,"index":19,"probability":79.7237569060774,"l1":9385,"l2":9110,"l3":8835,"l4":8560,"lgap":825,"lrate":9},{"date":"2020-02-04","open":9250,"high":9390,"low":9190,"close":9310,"trading_volume":621369,"score":6500,"index":74,"probability":79.9817418294687,"l1":9450,"l2":9350,"l3":9250,"l4":9150,"lgap":300,"lrate":3},{"date":"2020-02-05","open":9550,"high":9650,"low":9370,"close":9380,"trading_volume":846314,"score":6790,"index":7,"probability":78,"l1":9655,"l2":9515,"l3":9375,"l4":9235,"lgap":420,"lrate":4},{"date":"2020-02-06","open":9450,"high":9560,"low":9440,"close":9530,"trading_volume":526912,"score":6500,"index":74,"probability":79.9817418294687,"l1":9605,"l2":9545,"l3":9485,"l4":9425,"lgap":180,"lrate":2},{"date":"2020-02-07","open":9530,"high":9540,"low":9400,"close":9500,"trading_volume":376551,"score":6500,"index":74,"probability":79.9817418294687,"l1":9590,"l2":9520,"l3":9450,"l4":9380,"lgap":210,"lrate":2},{"date":"2020-02-10","open":9430,"high":9540,"low":9340,"close":9460,"trading_volume":456679,"score":7.35064935064935,"index":9.5,"probability":78.9041095890411,"l1":9600,"l2":9500,"l3":9400,"l4":9300,"lgap":300,"lrate":3},{"date":"2020-02-11","open":9570,"high":10050,"low":9550,"close":10000,"trading_volume":1489244,"score":10.8571428571429,"index":5.83333333333333,"probability":77.1451229183188,"l1":10275,"l2":10025,"l3":9775,"l4":9525,"lgap":750,"lrate":8},{"date":"2020-02-12","open":10300,"high":10350,"low":9990,"close":10150,"trading_volume":1096631,"score":15.5882352941176,"index":4.33333333333333,"probability":74.9438202247191,"l1":10430,"l2":10250,"l3":10070,"l4":9890,"lgap":540,"lrate":5},{"date":"2020-02-13","open":10200,"high":10200,"low":9910,"close":9980,"trading_volume":822296,"score":8.82894736842105,"index":0.777777777777778,"probability":17.6923076923077,"l1":10235,"l2":10090,"l3":9945,"l4":9800,"lgap":435,"lrate":4},{"date":"2020-02-14","open":10100,"high":10400,"low":10000,"close":10300,"trading_volume":1327010,"score":10.3670886075949,"index":5.66666666666667,"probability":76.9798657718121,"l1":10550,"l2":10350,"l3":10150,"l4":9950,"lgap":600,"lrate":6},{"date":"2020-02-17","open":10400,"high":10650,"low":10300,"close":10450,"trading_volume":1364588,"score":10.4025974025974,"index":5.83333333333333,"probability":77.1451229183188,"l1":10725,"l2":10550,"l3":10375,"l4":10200,"lgap":525,"lrate":5},{"date":"2020-02-18","open":10500,"high":10500,"low":10050,"close":10150,"trading_volume":747270,"score":10.6623376623377,"index":5.83333333333333,"probability":77.1451229183188,"l1":10550,"l2":10325,"l3":10100,"l4":9875,"lgap":675,"lrate":7},{"date":"2020-02-19","open":10250,"high":10300,"low":9820,"close":10100,"trading_volume":966653,"score":15.0909090909091,"index":3.4,"probability":72.0382165605096,"l1":10440,"l2":10200,"l3":9960,"l4":9720,"lgap":720,"lrate":7},{"date":"2020-02-20","open":10200,"high":10400,"low":9960,"close":10050,"trading_volume":957884,"score":13.265625,"index":4.625,"probability":75.5338450802512,"l1":10445,"l2":10225,"l3":10005,"l4":9785,"lgap":660,"lrate":7},{"date":"2020-02-21","open":9840,"high":10100,"low":9810,"close":9830,"trading_volume":704262,"score":6150,"index":67,"probability":79.9777282850779,"l1":10110,"l2":9965,"l3":9820,"l4":9675,"lgap":435,"lrate":4},{"date":"2020-02-24","open":9490,"high":9610,"low":9410,"close":9510,"trading_volume":368950,"score":11.92,"index":10.5,"probability":79.1011235955056,"l1":9660,"l2":9560,"l3":9460,"l4":9360,"lgap":300,"lrate":3},{"date":"2020-02-25","open":9240,"high":9640,"low":9150,"close":9640,"trading_volume":721133,"score":11.92,"index":10.5,"probability":79.1011235955056,"l1":9885,"l2":9640,"l3":9395,"l4":9150,"lgap":735,"lrate":8},{"date":"2020-02-26","open":9410,"high":9640,"low":9350,"close":9510,"trading_volume":572010,"score":6080,"index":65,"probability":79.9763369616659,"l1":9720,"l2":9575,"l3":9430,"l4":9285,"lgap":435,"lrate":5},{"date":"2020-02-27","open":9520,"high":9760,"low":8880,"close":8990,"trading_volume":1131178,"score":7150,"index":15,"probability":79.5575221238938,"l1":9815,"l2":9375,"l3":8935,"l4":8495,"lgap":1320,"lrate":15},{"date":"2020-02-28","open":8680,"high":8850,"low":8470,"close":8520,"trading_volume":1001769,"score":6430,"index":73,"probability":79.9812382739212,"l1":8875,"l2":8685,"l3":8495,"l4":8305,"lgap":570,"lrate":7},{"date":"2020-03-02","open":8520,"high":9050,"low":8420,"close":9050,"trading_volume":756360,"score":8420,"index":15,"probability":79.5575221238938,"l1":9365,"l2":9050,"l3":8735,"l4":8420,"lgap":945,"lrate":10},{"date":"2020-03-03","open":9400,"high":9410,"low":8960,"close":9070,"trading_volume":668249,"score":8420,"index":15,"probability":79.5575221238938,"l1":9465,"l2":9240,"l3":9015,"l4":8790,"lgap":675,"lrate":7},{"date":"2020-03-04","open":8950,"high":9290,"low":8930,"close":9240,"trading_volume":503286,"score":8620,"index":15,"probability":79.5575221238938,"l1":9445,"l2":9265,"l3":9085,"l4":8905,"lgap":540,"lrate":6}]

In [28]:
test = pd.DataFrame(ttt)

In [29]:
test[test.columns[1:]]

Unnamed: 0,open,high,low,close,trading_volume,score,index,probability,l1,l2,l3,l4,lgap,lrate
0,8680,9120,8570,9100,1123451,7510.0,19.0,79.723757,9385,9110,8835,8560,825,9
1,9250,9390,9190,9310,621369,6500.0,74.0,79.981742,9450,9350,9250,9150,300,3
2,9550,9650,9370,9380,846314,6790.0,7.0,78.0,9655,9515,9375,9235,420,4
3,9450,9560,9440,9530,526912,6500.0,74.0,79.981742,9605,9545,9485,9425,180,2
4,9530,9540,9400,9500,376551,6500.0,74.0,79.981742,9590,9520,9450,9380,210,2
5,9430,9540,9340,9460,456679,7.350649,9.5,78.90411,9600,9500,9400,9300,300,3
6,9570,10050,9550,10000,1489244,10.857143,5.833333,77.145123,10275,10025,9775,9525,750,8
7,10300,10350,9990,10150,1096631,15.588235,4.333333,74.94382,10430,10250,10070,9890,540,5
8,10200,10200,9910,9980,822296,8.828947,0.777778,17.692308,10235,10090,9945,9800,435,4
9,10100,10400,10000,10300,1327010,10.367089,5.666667,76.979866,10550,10350,10150,9950,600,6


In [30]:
ccc = 100 * (np.array(test[test.columns[1:]].close.shift(-1)) - np.array(test[test.columns[1:]].close))\
                      /np.array(test[test.columns[1:]].close)
ddd = 100 * (model.predict(test[test.columns[1:]]) - np.array(test[test.columns[1:]].close))\
                      /np.array(test[test.columns[1:]].close)

In [31]:
predict = pd.DataFrame({'오늘 실제 종가' : np.array(test[test.columns[1:]].close),
                        '내일 실제 종가' : np.array(test[test.columns[1:]].close.shift(-1)),
                        '내일 예측 종가' : model.predict(test[test.columns[1:]]),
                        '실제 내일 변동(%)' : ccc,
                        '모델 예상 내일 변동(%)' : ddd})
round(predict, 2)

Unnamed: 0,오늘 실제 종가,내일 실제 종가,내일 예측 종가,실제 내일 변동(%),모델 예상 내일 변동(%)
0,9100,9310.0,8979.18,2.31,-1.33
1,9310,9380.0,9237.27,0.75,-0.78
2,9380,9530.0,9747.42,1.6,3.92
3,9530,9500.0,9573.05,-0.31,0.45
4,9500,9460.0,9562.67,-0.42,0.66
5,9460,10000.0,9631.66,5.71,1.81
6,10000,10150.0,10090.94,1.5,0.91
7,10150,9980.0,10041.04,-1.67,-1.07
8,9980,10300.0,10010.1,3.21,0.3
9,10300,10450.0,10357.07,1.46,0.55
