# NextPrice

## 종목코드

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('./database/stock_code.csv', dtype={'종목코드':str})

In [3]:
df.head(1)

Unnamed: 0,회사명,종목코드,업종,주요제품,상장일,결산월,대표자명,홈페이지,지역
0,DL,210,기타 금융업,지주회사,1976-02-02,12월,전병욱,http://www.dlholdings.co.kr,서울특별시


In [4]:
firm_name = 'HLB'

In [5]:
firm_code = df[df['회사명'] == firm_name]['종목코드'].to_string(index=False)

In [6]:
print(firm_code)

028300


In [7]:
import FinanceDataReader as fdr

In [8]:
df = fdr.DataReader(symbol=firm_code)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-19,30600,31500,30200,31200,326462,0.014634
2023-01-20,31000,31400,30900,31050,209051,-0.004808
2023-01-25,31450,31850,31000,31150,274455,0.003221


In [9]:
def moving_average(
    df,
    windows = [5,20,60,120],
    change=False
):
    """
    
    This is a moving average of price.
    
    Args:
      windows: list. 
      change: float. True tries changing the MA of price. (Close - MA) / Close.
      
    """
    
    for window in windows:
        MA = 'MA' + str(window)
        df[MA] = df['Close'].rolling(window=window, min_periods=1).mean()
        if change:
            df[MA] = (df['Close'] - df[MA]) / df['Close']
        else:
            pass
        
    return df

In [10]:
df = moving_average(df, change=True)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-19,30600,31500,30200,31200,326462,0.014634,-0.008654,0.031721,-0.046228,-0.186588
2023-01-20,31000,31400,30900,31050,209051,-0.004808,-0.007729,0.026915,-0.04733,-0.190394
2023-01-25,31450,31850,31000,31150,274455,0.003221,0.001605,0.027207,-0.040108,-0.184352


In [11]:
def changing_price(
    df
):
    """
    
    (Open - Close) / Close
    (High - Close) / Close
    (Low - Close) / Close
    
    (Volume - Volume) / Volume
    
    """
    
    df['Open'] = (df['Open'] - df['Close']) / df['Close']
    df['High'] = (df['High'] - df['Close']) / df['Close']
    df['Low'] = (df['Low'] - df['Close']) / df['Close']

    return df

In [12]:
df = changing_price(df)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-19,-0.019231,0.009615,-0.032051,31200,326462,0.014634,-0.008654,0.031721,-0.046228,-0.186588
2023-01-20,-0.00161,0.011272,-0.004831,31050,209051,-0.004808,-0.007729,0.026915,-0.04733,-0.190394
2023-01-25,0.009631,0.022472,-0.004815,31150,274455,0.003221,0.001605,0.027207,-0.040108,-0.184352


In [13]:
def y_label(df):
    
    """
    
    y = 1 or 0
    
    """
    
    df['y'] = df['Change'] > 0
    df['y'] = df['y'].astype(int) 
    
    return df

In [14]:
df = y_label(df)
df.tail(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-19,-0.019231,0.009615,-0.032051,31200,326462,0.014634,-0.008654,0.031721,-0.046228,-0.186588,1
2023-01-20,-0.00161,0.011272,-0.004831,31050,209051,-0.004808,-0.007729,0.026915,-0.04733,-0.190394,0
2023-01-25,0.009631,0.022472,-0.004815,31150,274455,0.003221,0.001605,0.027207,-0.040108,-0.184352,1


In [15]:
def slice_year(df,year):
    
    """
    
    after year
    
    """
    
    year = str(year)
    
    df = df[year:]
    
    return df

In [16]:
df = slice_year(df,'2015')
df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-01-17,0.025518,0.036683,-0.007974,31350,466629,-0.023364,-0.011164,0.039738,-0.048339,-0.184804,0
2023-01-18,0.019512,0.022764,-0.001626,30750,336558,-0.019139,-0.026667,0.019954,-0.064953,-0.205934,0
2023-01-19,-0.019231,0.009615,-0.032051,31200,326462,0.014634,-0.008654,0.031721,-0.046228,-0.186588,1
2023-01-20,-0.00161,0.011272,-0.004831,31050,209051,-0.004808,-0.007729,0.026915,-0.04733,-0.190394,0
2023-01-25,0.009631,0.022472,-0.004815,31150,274455,0.003221,0.001605,0.027207,-0.040108,-0.184352,1


In [17]:
def split_x_y(df):
    
    """
    
    Prepare X and y
    
    """
    
    X = df[['MA5', 'MA20', 'MA60', 'MA120', 'Volume', 'Change', 'Open', 'High', 'Low']]
    y = df['y']
    
    X = X[:-1]
    y = y[1:]
    
    return X, y

In [18]:
X, y = split_x_y(df)

In [19]:
X.tail(3)

Unnamed: 0_level_0,MA5,MA20,MA60,MA120,Volume,Change,Open,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-18,-0.026667,0.019954,-0.064953,-0.205934,336558,-0.019139,0.019512,0.022764,-0.001626
2023-01-19,-0.008654,0.031721,-0.046228,-0.186588,326462,0.014634,-0.019231,0.009615,-0.032051
2023-01-20,-0.007729,0.026915,-0.04733,-0.190394,209051,-0.004808,-0.00161,0.011272,-0.004831


In [20]:
y.tail(3)

Date
2023-01-19    1
2023-01-20    0
2023-01-25    1
Name: y, dtype: int32

In [21]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

## Scaler

In [22]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

## PCA

In [23]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X_train)
X_train = pca.transform(X_train)
X_test = pca.transform(X_test)

## Model

In [24]:
from keras.optimizers import Adagrad
from keras.models import Sequential
from keras.layers import Dropout
from keras.layers import Dense

In [25]:
model = Sequential()
model.add(Dense(12, input_dim=2, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(12, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(1, activation='sigmoid'))
optimizer = Adagrad(learning_rate=0.2)

In [26]:
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 12)                36        
                                                                 
 dropout (Dropout)           (None, 12)                0         
                                                                 
 dense_1 (Dense)             (None, 12)                156       
                                                                 
 dropout_1 (Dropout)         (None, 12)                0         
                                                                 
 dense_2 (Dense)             (None, 1)                 13        
                                                                 
Total params: 205
Trainable params: 205
Non-trainable params: 0
_________________________________________________________________


In [27]:
model.fit(X_train, y_train, epochs=40, batch_size=20)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x1a545336dc0>

In [28]:
_, accuracy = model.evaluate(X_test, y_test)
print(accuracy)

0.5584677457809448


In [29]:
y_pred = model.predict(X_test)



In [30]:
from sklearn import metrics
import numpy as np

In [31]:
confusion_matrix = metrics.confusion_matrix(y_test, np.rint(y_pred))
confusion_matrix

array([[247,  26],
       [193,  30]], dtype=int64)

## Save

In [32]:
import joblib
joblib.dump(scaler, './database/nextprice_scaler.pkl')

['./database/nextprice_scaler.pkl']

In [33]:
import pickle
pickle.dump(pca, open('./database/nextprice_pca.pkl','wb'))

In [34]:
from tensorflow import keras
model.save('./database/nextprice_model')

INFO:tensorflow:Assets written to: ./database/nextprice_model\assets


## Load

In [35]:
import joblib
scaler = joblib.load('./database/nextprice_scaler.pkl')

In [36]:
import pickle
pca = pickle.load(open('./database/nextprice_pca.pkl','rb'))

In [37]:
from tensorflow import keras
model = keras.models.load_model('./database/nextprice_model')

## Prediction

In [38]:
print(firm_code)

028300


In [39]:
df = fdr.DataReader(symbol=firm_code)
df.tail(1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-01-25,31450,31850,31000,31150,274455,0.003221


In [40]:
df = moving_average(df, change=True)
df.tail(1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-25,31450,31850,31000,31150,274455,0.003221,0.001605,0.027207,-0.040108,-0.184352


In [41]:
df = changing_price(df)
df.tail(1)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Change,MA5,MA20,MA60,MA120
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2023-01-25,0.009631,0.022472,-0.004815,31150,274455,0.003221,0.001605,0.027207,-0.040108,-0.184352


In [42]:
df = df[['MA5', 'MA20', 'MA60', 'MA120', 'Volume', 'Change', 'Open', 'High', 'Low']]
df.tail(1)

Unnamed: 0_level_0,MA5,MA20,MA60,MA120,Volume,Change,Open,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-25,0.001605,0.027207,-0.040108,-0.184352,274455,0.003221,0.009631,0.022472,-0.004815


In [43]:
df = df[-1:]
df

Unnamed: 0_level_0,MA5,MA20,MA60,MA120,Volume,Change,Open,High,Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2023-01-25,0.001605,0.027207,-0.040108,-0.184352,274455,0.003221,0.009631,0.022472,-0.004815


In [44]:
df = scaler.transform(df)
df

array([[ 0.03253001,  0.21189946, -0.13651748, -0.57171108, -0.61342763,
         0.02770151,  0.17620208, -0.23751169,  0.61068636]])

In [45]:
df = pca.transform(df)
df

array([[-0.35860043,  0.30462101]])

In [46]:
prediction = model.predict(df)
print(prediction)

[[0.41868126]]
