In [76]:
# python warning off
import warnings
warnings.filterwarnings('ignore')

# tensorflow warning off
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [77]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

In [78]:
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.utils import np_utils

In [79]:
names = [ 'feat_{}'.format(i) for i in range(73) ] # feat_0, feat_1, ... ,feat_72 생성 column 명 지정

In [80]:
df = pd.read_csv('datasets/eighthr.data', names=names)
df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_63,feat_64,feat_65,feat_66,feat_67,feat_68,feat_69,feat_70,feat_71,feat_72
1/1/1998,0.8,1.8,2.4,2.1,2,2.1,1.5,1.7,1.9,2.3,...,0.15,10.67,-1.56,5795,-12.1,17.9,10330,-55,0,0.0
1/2/1998,2.8,3.2,3.3,2.7,3.3,3.2,2.9,2.8,3.1,3.4,...,0.48,8.39,3.84,5805,14.05,29,10275,-55,0,0.0
1/3/1998,2.9,2.8,2.6,2.1,2.2,2.5,2.5,2.7,2.2,2.5,...,0.6,6.94,9.8,5790,17.9,41.3,10235,-40,0,0.0
1/4/1998,4.7,3.8,3.7,3.8,2.9,3.1,2.8,2.5,2.4,3.1,...,0.49,8.73,10.54,5775,31.15,51.7,10195,-40,2.08,0.0
1/5/1998,2.6,2.1,1.6,1.4,0.9,1.5,1.2,1.4,1.3,1.4,...,?,?,?,?,?,?,?,?,0.58,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12/27/2004,0.3,0.4,0.5,0.5,0.2,0.3,0.4,0.4,1.3,2.2,...,0.07,7.93,-4.41,5800,-25.6,21.8,10295,65,0,0.0
12/28/2004,1,1.4,1.1,1.7,1.5,1.7,1.8,1.5,2.1,2.4,...,0.04,5.95,-1.14,5845,-19.4,19.1,10310,15,0,0.0
12/29/2004,0.8,0.8,1.2,0.9,0.4,0.6,0.8,1.1,1.5,1.5,...,0.06,7.8,-0.64,5845,-9.6,35.2,10275,-35,0,0.0
12/30/2004,1.3,0.9,1.5,1.2,1.6,1.8,1.1,1,1.9,2,...,0.25,7.72,-0.89,5845,-19.6,34.2,10245,-30,0.05,0.0


In [81]:
df.info() # missing data 확인

<class 'pandas.core.frame.DataFrame'>
Index: 2534 entries, 1/1/1998 to 12/31/2004
Data columns (total 73 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   feat_0   2534 non-null   object 
 1   feat_1   2534 non-null   object 
 2   feat_2   2534 non-null   object 
 3   feat_3   2534 non-null   object 
 4   feat_4   2534 non-null   object 
 5   feat_5   2534 non-null   object 
 6   feat_6   2534 non-null   object 
 7   feat_7   2534 non-null   object 
 8   feat_8   2534 non-null   object 
 9   feat_9   2534 non-null   object 
 10  feat_10  2534 non-null   object 
 11  feat_11  2534 non-null   object 
 12  feat_12  2534 non-null   object 
 13  feat_13  2534 non-null   object 
 14  feat_14  2534 non-null   object 
 15  feat_15  2534 non-null   object 
 16  feat_16  2534 non-null   object 
 17  feat_17  2534 non-null   object 
 18  feat_18  2534 non-null   object 
 19  feat_19  2534 non-null   object 
 20  feat_20  2534 non-null   object 
 21  feat_2

In [82]:
# 잘못된 데이터 ? 수정하기 
df = df.apply(pd.to_numeric, errors='coerce') # error가 발생하면 무조건 숫자(numeric)로 바꿔라
df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_63,feat_64,feat_65,feat_66,feat_67,feat_68,feat_69,feat_70,feat_71,feat_72
1/1/1998,0.8,1.8,2.4,2.1,2.0,2.1,1.5,1.7,1.9,2.3,...,0.15,10.67,-1.56,5795.0,-12.10,17.90,10330.0,-55.0,0.00,0.0
1/2/1998,2.8,3.2,3.3,2.7,3.3,3.2,2.9,2.8,3.1,3.4,...,0.48,8.39,3.84,5805.0,14.05,29.00,10275.0,-55.0,0.00,0.0
1/3/1998,2.9,2.8,2.6,2.1,2.2,2.5,2.5,2.7,2.2,2.5,...,0.60,6.94,9.80,5790.0,17.90,41.30,10235.0,-40.0,0.00,0.0
1/4/1998,4.7,3.8,3.7,3.8,2.9,3.1,2.8,2.5,2.4,3.1,...,0.49,8.73,10.54,5775.0,31.15,51.70,10195.0,-40.0,2.08,0.0
1/5/1998,2.6,2.1,1.6,1.4,0.9,1.5,1.2,1.4,1.3,1.4,...,,,,,,,,,0.58,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12/27/2004,0.3,0.4,0.5,0.5,0.2,0.3,0.4,0.4,1.3,2.2,...,0.07,7.93,-4.41,5800.0,-25.60,21.80,10295.0,65.0,0.00,0.0
12/28/2004,1.0,1.4,1.1,1.7,1.5,1.7,1.8,1.5,2.1,2.4,...,0.04,5.95,-1.14,5845.0,-19.40,19.10,10310.0,15.0,0.00,0.0
12/29/2004,0.8,0.8,1.2,0.9,0.4,0.6,0.8,1.1,1.5,1.5,...,0.06,7.80,-0.64,5845.0,-9.60,35.20,10275.0,-35.0,0.00,0.0
12/30/2004,1.3,0.9,1.5,1.2,1.6,1.8,1.1,1.0,1.9,2.0,...,0.25,7.72,-0.89,5845.0,-19.60,34.20,10245.0,-30.0,0.05,0.0


In [83]:
df.dropna(inplace=True) #  df = df.dropna() 와 같다. 
df

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_63,feat_64,feat_65,feat_66,feat_67,feat_68,feat_69,feat_70,feat_71,feat_72
1/1/1998,0.8,1.8,2.4,2.1,2.0,2.1,1.5,1.7,1.9,2.3,...,0.15,10.67,-1.56,5795.0,-12.10,17.90,10330.0,-55.0,0.00,0.0
1/2/1998,2.8,3.2,3.3,2.7,3.3,3.2,2.9,2.8,3.1,3.4,...,0.48,8.39,3.84,5805.0,14.05,29.00,10275.0,-55.0,0.00,0.0
1/3/1998,2.9,2.8,2.6,2.1,2.2,2.5,2.5,2.7,2.2,2.5,...,0.60,6.94,9.80,5790.0,17.90,41.30,10235.0,-40.0,0.00,0.0
1/4/1998,4.7,3.8,3.7,3.8,2.9,3.1,2.8,2.5,2.4,3.1,...,0.49,8.73,10.54,5775.0,31.15,51.70,10195.0,-40.0,2.08,0.0
1/7/1998,3.7,3.2,3.8,5.1,6.0,7.0,6.3,6.4,6.3,5.4,...,0.84,6.86,25.60,5695.0,26.75,48.45,10040.0,-80.0,0.18,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12/27/2004,0.3,0.4,0.5,0.5,0.2,0.3,0.4,0.4,1.3,2.2,...,0.07,7.93,-4.41,5800.0,-25.60,21.80,10295.0,65.0,0.00,0.0
12/28/2004,1.0,1.4,1.1,1.7,1.5,1.7,1.8,1.5,2.1,2.4,...,0.04,5.95,-1.14,5845.0,-19.40,19.10,10310.0,15.0,0.00,0.0
12/29/2004,0.8,0.8,1.2,0.9,0.4,0.6,0.8,1.1,1.5,1.5,...,0.06,7.80,-0.64,5845.0,-9.60,35.20,10275.0,-35.0,0.00,0.0
12/30/2004,1.3,0.9,1.5,1.2,1.6,1.8,1.1,1.0,1.9,2.0,...,0.25,7.72,-0.89,5845.0,-19.60,34.20,10245.0,-30.0,0.05,0.0


In [84]:
df.describe()

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,...,feat_63,feat_64,feat_65,feat_66,feat_67,feat_68,feat_69,feat_70,feat_71,feat_72
count,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,...,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0,1847.0
mean,1.62902,1.570384,1.536979,1.520087,1.515268,1.533676,1.632431,2.037899,2.525772,2.843151,...,0.300087,9.821191,0.647201,5822.425555,10.680401,37.689334,10165.476448,-0.836492,0.358787,0.069302
std,1.253261,1.243717,1.218822,1.19488,1.187541,1.158722,1.137623,1.154328,1.17236,1.208258,...,0.244579,9.342806,7.352023,75.711087,20.170554,11.007448,52.056467,34.134815,1.262573,0.254035
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,...,0.01,-14.92,-25.99,5480.0,-56.7,-10.1,9995.0,-135.0,0.0,0.0
25%,0.6,0.6,0.6,0.6,0.6,0.7,0.8,1.2,1.7,2.0,...,0.09,2.705,-3.99,5775.0,-2.75,33.05,10130.0,-20.0,0.0,0.0
50%,1.3,1.3,1.2,1.3,1.3,1.3,1.4,1.9,2.4,2.8,...,0.22,9.22,0.26,5835.0,14.7,41.35,10160.0,0.0,0.0,0.0
75%,2.4,2.3,2.2,2.2,2.2,2.1,2.2,2.8,3.3,3.6,...,0.47,16.505,4.645,5880.0,27.825,45.15,10195.0,15.0,0.05,0.0
max,6.9,6.9,7.1,6.7,7.2,7.4,7.2,7.5,9.2,8.3,...,1.0,41.36,30.42,5965.0,42.05,59.15,10350.0,140.0,20.65,1.0


In [85]:
Y = df['feat_72']
Y

1/1/1998      0.0
1/2/1998      0.0
1/3/1998      0.0
1/4/1998      0.0
1/7/1998      0.0
             ... 
12/27/2004    0.0
12/28/2004    0.0
12/29/2004    0.0
12/30/2004    0.0
12/31/2004    0.0
Name: feat_72, Length: 1847, dtype: float64

In [86]:
# one hot encoding start 
Y = np_utils.to_categorical(Y)
Y

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)

In [87]:
df.drop(['feat_72'], axis=1,inplace=True)

In [88]:
X_train = np.array(df[:-100].values.tolist(), dtype=np.float64)

In [89]:
X_test = np.array(df[-100:].values.tolist(), dtype=np.float64)

In [90]:
y_train = Y[:-100]
y_test = Y[-100:]

In [91]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1747, 72), (1747, 2), (100, 72), (100, 2))

In [92]:
X_train = X_train[47:] # 앞에 47개는 버리기. 

In [93]:
y_train = y_train[47:]

In [94]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((1700, 72), (1700, 2), (100, 72), (100, 2))

In [95]:
X_train = X_train.reshape(-1, 10, 72) #3차원으로 만들어준다. 10개씩 묶어
y_train = y_train.reshape(-1,10,2)

In [96]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((170, 10, 72), (170, 10, 2), (100, 72), (100, 2))

In [97]:
model = Sequential()
model.add(LSTM(128, input_shape=(10,72), return_sequences= True))
model.add(LSTM(256, return_sequences= True)) # marry to marry 방식일때 쓴다고 하는데 뭔지 모르겠다~~
model.add(Dense(2, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 10, 128)           102912    
_________________________________________________________________
lstm_4 (LSTM)                (None, 10, 256)           394240    
_________________________________________________________________
dense_2 (Dense)              (None, 10, 2)             514       
Total params: 497,666
Trainable params: 497,666
Non-trainable params: 0
_________________________________________________________________


In [98]:
model.compile(loss='categorical_crossentropy', optimizer='adam',metrics=['accuracy'])

In [99]:
%%time
model.fit(X_train, y_train, epochs=10, batch_size=1, validation_split=0.1)

Train on 153 samples, validate on 17 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Wall time: 24.1 s


<keras.callbacks.History at 0x2906ab02288>

In [100]:
X_test = X_test.reshape(-1,10,72)

In [101]:
y_test = y_test.reshape(-1,10,2)

In [102]:
score = model.evaluate(X_test, y_test)
score
# [loss, score] = [0.18845698237419128, 0.9800000190734863]



[0.18845698237419128, 0.9800000190734863]

In [104]:
## Samsung Electronics 주식 예측하기 