In [1]:
!pip install torch torchvision
!pip install -U finance-datareader


Collecting finance-datareader
  Downloading https://files.pythonhosted.org/packages/ed/10/1f819839aae5d08e6cc7b29425d278c10104a18e0155282aa2ce552e3fa2/finance_datareader-0.9.10-py3-none-any.whl
Collecting requests-file
  Downloading https://files.pythonhosted.org/packages/77/86/cdb5e8eaed90796aa83a6d9f75cfbd37af553c47a291cd47bc410ef9bdb2/requests_file-1.5.1-py2.py3-none-any.whl
Installing collected packages: requests-file, finance-datareader
Successfully installed finance-datareader-0.9.10 requests-file-1.5.1


In [2]:
import pandas as pd
import pandas_datareader as pdr
import FinanceDataReader as fdr
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import argparse
import time
from copy import deepcopy # Add Deepcopy for args
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [60]:
x_frames = 7
start = datetime.datetime.now() - datetime.timedelta(days=(x_frames*3))
end = datetime.date.today()
df_du = pdr.DataReader('POILDUBUSDM', 'fred', start=datetime.datetime.now() - datetime.timedelta(days=(100)), end=end)#석유
df_gold = pdr.DataReader('GOLDAMGBD228NLBM', 'fred', start=start, end=end)#금값
df_kospi = fdr.DataReader('KS11', start, end)#코스피
df_dollar = fdr.DataReader('USD/KRW', start, end)#원달러환율

df_kospi['gold'] = df_gold
df_kospi['dollar'] = df_dollar['Close']
df_kospi['oil'] = df_du

#gold 채우기
df_kospi = df_kospi.fillna(value=-1)
for i,val in enumerate(df_kospi['gold']):
    if val == -1:
        df_kospi['gold'][i] = df_kospi['gold'][i-1]
df_kospi = df_kospi.tail(x_frames)

#oil 채우기
du = df_du.values[-1]
ddu = [du for i in range(x_frames)]

In [61]:
df_kospi

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change,gold,dollar,oil
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-09-24,2272.7,2295.62,2309.74,2268.88,714880000.0,-0.0259,1850.75,1169.1,-1.0
2020-09-25,2278.79,2295.19,2296.36,2267.63,697960000.0,0.0027,1870.05,1174.32,-1.0
2020-09-28,2308.08,2301.16,2314.81,2291.32,605980000.0,0.0129,1850.95,1166.85,-1.0
2020-09-29,2327.89,2331.51,2339.83,2320.04,646220000.0,0.0086,1882.4,1168.73,-1.0
2020-10-05,2358.0,2330.55,2364.73,2327.83,763620000.0,0.0129,1899.65,1157.12,-1.0
2020-10-06,2365.9,2369.17,2378.28,2355.39,861750000.0,0.0034,1912.5,1164.37,-1.0
2020-10-07,2386.94,2350.82,2387.45,2347.82,722840.0,0.0089,1912.5,1157.84,-1.0


In [62]:
df_du

Unnamed: 0_level_0,POILDUBUSDM
DATE,Unnamed: 1_level_1
2020-07-01,42.641304
2020-08-01,43.705238


In [38]:
class predict_Dataset(Dataset):
    
    def __init__(self, symbol, x_frames,gold,du,df_kospi,df_dollar):
        
        self.symbol = symbol
        self.x_frames = x_frames
        
        self.start = datetime.datetime.now() - datetime.timedelta(days=(x_frames*2))
        self.end = datetime.date.today()

        self.scaler = MinMaxScaler()
        self.data = fdr.DataReader(self.symbol, self.start, self.end)#주식 6가지 특징, 고,시,저,종,거,변
        
        if len(self.data.index) == 0:
            print(self.symbol,' is zero')
            return None
        self.data['dollar'] = df_dollar[['Close']]
        self.data['kospi'] = df_kospi[['Close']]
        self.data['oil'] = pd.DataFrame(data=ddu, index=self.data.index, columns=['oil'])
        self.data['gold'] = pd.DataFrame(data=gold, index=df_gold.index, columns=['gold'])
        self.data = pd.DataFrame(self.scaler.fit_transform(self.data))
        self.data = self.data.tail(x_frames)
        print(self.data)
        
    def __len__(self):
        if len(self.data.index) == 0:
            return 0
        return len(self.data) - self.x_frames + 1
    
    def __getitem__(self, idx):
        idx += self.x_frames
        data = self.data.iloc[idx-self.x_frames:idx]
        data = data.values
        X = data[:self.x_frames]
        
        return X

In [17]:
class GRU(nn.Module):
    
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
        super(GRU, self).__init__()
        self.input_dim = input_dim 
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.num_layers = num_layers

        self.batch_size = batch_size
        self.dropout = dropout
        self.use_bn = use_bn 
        self.gru = nn.GRU(self.input_dim, self.hidden_dim*2, self.num_layers)
        self.gru2 = nn.GRU(self.hidden_dim*2, self.hidden_dim, self.num_layers)
        self.regressor = self.make_regressor()

    def make_regressor(self):
        layers = []
        if self.use_bn:
            layers.append(nn.BatchNorm1d(self.hidden_dim))
        layers.append(nn.Dropout(self.dropout))
        
        layers.append(nn.Linear(self.hidden_dim, self.hidden_dim//2))
        layers.append(nn.ReLU())
        layers.append(nn.Linear(self.hidden_dim//2, self.output_dim))
        regressor = nn.Sequential(*layers)
        return regressor
    
    def forward(self, x):
        x, _ = self.gru(x)
        x, _ = self.gru2(x)
        y_pred = self.regressor(x[-1].view(self.batch_size, -1))
        return y_pred

In [18]:
#주식정보들
Symbols = []
with open('/content/gdrive/My Drive/Symbols.txt', 'r') as f:
    data = f.readline()
    Symbols = data.split(',')
print(len(Symbols))

795


In [19]:
batch_size = 1
input_dim = 10
hidden_dim = 64
num_layers = 2
input_frame = 7
output_frame = 7
dropout = 1.0
use_bn = True

model = GRU(input_dim,hidden_dim,output_frame,num_layers,batch_size,dropout,use_bn)
modelPath = '/content/gdrive/My Drive/GRUmodel/GRU_Best_model.pt'
model.load_state_dict(torch.load(modelPath))
device = 'cuda' if torch.cuda.is_available() else 'cpu'

model.to(device)

GRU(
  (gru): GRU(10, 128, num_layers=2)
  (gru2): GRU(128, 64, num_layers=2)
  (regressor): Sequential(
    (0): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (1): Dropout(p=1.0, inplace=False)
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
    (4): Linear(in_features=32, out_features=7, bias=True)
  )
)

In [28]:
gold

[1850.75, 1870.05, 1850.95, 1882.4, 1883.4, 1895.55, 1906.4, 1899.65, 1912.5]

In [40]:
df_gold

Unnamed: 0_level_0,GOLDAMGBD228NLBM
DATE,Unnamed: 1_level_1
2020-09-24,1850.75
2020-09-25,1870.05
2020-09-28,1850.95
2020-09-29,1882.4
2020-09-30,1883.4
2020-10-01,1895.55
2020-10-02,1906.4
2020-10-05,1899.65
2020-10-06,1912.5


In [27]:
df_kospi

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-09-23,2333.24,2352.56,2354.46,2287.44,735040000.0,0.0003
2020-09-24,2272.7,2295.62,2309.74,2268.88,714880000.0,-0.0259
2020-09-25,2278.79,2295.19,2296.36,2267.63,697960000.0,0.0027
2020-09-28,2308.08,2301.16,2314.81,2291.32,605980000.0,0.0129
2020-09-29,2327.89,2331.51,2339.83,2320.04,646220000.0,0.0086
2020-10-05,2358.0,2330.55,2364.73,2327.83,763620000.0,0.0129
2020-10-06,2365.9,2369.17,2378.28,2355.39,861750000.0,0.0034
2020-10-07,2386.94,2350.82,2387.45,2347.82,722840.0,0.0089


In [39]:
predicts = {}
i = 0
for sb in Symbols:
    i += 1
    if i % 100 == 0:
        print(i)
    pset = predict_Dataset(sb,input_frame,gold,du,df_kospi,df_dollar)

    break
    if len(pset) == 0:
        continue
    model.eval()
    loader = DataLoader(pset,batch_size,shuffle=False)
    for X in loader:
        X = X.transpose(0, 1).float().to(device)
        y_pred = model(X)
        y_pred = y_pred.view(-1)
        predicts[sb] = y_pred[-1]


          0         1         2    3  ...         6         7    8         9
0  0.250000  0.250000  0.090909  0.0  ...  0.696512  0.000000  0.0  0.000000
1  0.000000  0.000000  0.000000  0.1  ...  1.000000  0.053309  0.0  0.312551
2  0.166667  0.666667  0.454545  1.0  ...  0.565698  0.309699  0.0  0.003239
3  0.833333  0.916667  1.000000  0.9  ...  0.675000  0.483106  0.0  0.512551
4  0.666667  0.666667  0.909091  1.0  ...  0.000000  0.746674  0.0  0.791903
5  1.000000  1.000000  0.909091  1.0  ...  0.421512  0.815826  0.0  1.000000
6  0.416667  0.500000  0.818182  0.9  ...  0.133721  1.000000  0.0       NaN

[7 rows x 10 columns]


In [23]:
print(predicts)

{'353200': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '344820': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '338100': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '308170': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '322000': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '272210': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '317400': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '330590': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '013890': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '336370': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '336260': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '036420': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '003670': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '307950': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '192650': tensor(nan, device='cuda:0', grad_fn=<SelectBackward>), '192080':

In [None]:
!pip install pymysql
import pymysql

In [None]:
conn = pymysql.connect(host='jukerdb.cwhsnjoqybdo.ap-northeast-2.rds.amazonaws.com', user='admin', password='',
                       db='WEB', charset='utf8')
 
# Connection 으로부터 Cursor 생성
curs = conn.cursor()
 

for sb in result:
    #table : stockpreidct
    #symbol, value, type, day
    # SQL문 실행
    sql = """INSERT INTO stockpredict VALUES(%s,%s,%s,NOW())"""
    val = (sb,result[sb],'L')
    curs.execute(sql,val)
    conn.commit()

sql = "select * from stockpredict"
curs.execute(sql)
rows = curs.fetchall()
print(rows)     # 전체 rows

# Connection 닫기
conn.close()