In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import pyupbit
import time
import math

import torch
import torch.nn as nn
import torch.nn.functional as F

# data loader

In [2]:
class dataloader() :
    def __init__(self,  ticker, interval) :
        self.norm = {"minmax" : self.MinMax,
                    "stand" : self.standarization,
                    "diff" : self.diff}
        
        self.ticker = ticker
        self.interval = interval
        
    def __call__(self,  to, count, norm='stand')  :
        
        self.original_data = pyupbit.get_ohlcv(ticker=self.ticker, 
                                    interval=self.interval,
                                    to=to, 
                                    count=count)

        self.data = self.preprocess(self.original_data , norm)
        return self.data
        
        
    def MinMax(self, df) :
        norm = MinMaxScaler()
        norm_dataset = norm.fit_transform(df)
        return pd.DataFrame(norm_dataset, columns=list(df.columns))
    
    
    def standarization(self, df) :
        for col in df:
            df[col] = (df[col] - df[col].mean()) / df[col].std()
        return df
    
    
    def diff(self, df) :
        for col in (df):
            log_y = np.log(df[col])
            df[col][1:] = np.diff(log_y)
        return df[1:]
    

    def add_label(self, dataset_df) :
        after10 = np.zeros_like(dataset_df['close'])
        for i in range(len(dataset_df['close']) - 1) :
            if dataset_df['close'][i + 1] > dataset_df['close'][i] :
                after10[i] = 1
            elif dataset_df['close'][i + 1] <= dataset_df['close'][i] : 
                print(dataset_df['close'][i + 1])
                print(dataset_df['close'][i])
                after10[i] = 0
            else :
                after10[i] = None
            
        return pd.DataFrame(after10,columns=['label'])
    
        
    def add_avgPrice(self, dataset_df) :
        return (dataset_df['high'] + dataset_df['low'] + 
                dataset_df['open'] + dataset_df['close']) // 4
    
    
    def drop_feature(self, dataset_df) :
        # index(시간) 제거
        dataset_df = dataset_df.reset_index(drop=True)
        # value 제거
#         dataset_df = dataset_df.drop(columns=['value'])
        return dataset_df
    
    
    def WindowDataGenerator(self, df_data, window_size=144, stride=6, norm="stand") :
        if norm == "diff" :
            num_sample = ((df_data.shape[0] - 1) - window_size) // stride + 1
            data = np.zeros([window_size - 1, df_data.shape[1], num_sample])
        else : 
            num_sample = (df_data.shape[0] - window_size) // stride + 1
            data = np.zeros([window_size, df_data.shape[1], num_sample])
            
        # labels = np.zeros([num_sample])

        for i in range(num_sample) :
            data_start = stride * i
            data_end = data_start + window_size
            tmp = {}
            for col in df_data.columns :
                tmp[col] = df_data[col][data_start : data_end].copy()
    
            data[:, :, i] = pd.DataFrame(tmp).values
            # labels[i] = df_label.values[data_end - 1]

        data = data.transpose((2, 0, 1))

        return torch.Tensor(data) #, torch.Tensor(labels)
       
    
    def preprocess(self, dataset, normalization) :
        
        # drop feature
        dataset_df = self.drop_feature(dataset)
        
        # avg_price 추가
        dataset_df['avg_price'] = self.add_avgPrice(dataset_df)
        
        # label 추가
#         if normalization == "diff" :
#             label = self.add_label(dataset_df)[1:-1]
#         else :
#             label = self.add_label(dataset_df)[:-1]

        norm_df = self.norm[normalization](dataset_df.copy())
#         data, label = self.WindowDataGenerator(norm_df, label)
        data = self.WindowDataGenerator(norm_df)
        
        
        return data
        
        

# Model

In [3]:
class Transformer2FC(nn.Module) :
    def __init__(self, input_shape, d_model, n_head, num_layer, dropout, num_class=2):
        super(Transformer2FC, self).__init__()
        
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=n_head, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layer)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        
        self.Encoder = nn.Sequential(
            nn.Linear(input_shape[1], d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, d_model)
        )
        
        self.linear = nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, 1)
        )

        self.linear2 = nn.Sequential(
            nn.Linear(input_shape[0], input_shape[0]//2),
            nn.ReLU(),
            nn.Linear(input_shape[0]//2, num_class)
        )
        
#         self.sigmoid = nn.Softmax()
    
    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask
    
    def forward(self, x, masked_x) :
        # (batch, data, dim)
        x = self.Encoder(x)
        x = self.pos_encoder(x)
        x = self.transformer_encoder(x.transpose(0,1), masked_x).transpose(0, 1)
        x = self.linear(x)
        x = x.squeeze(2)
        x = self.linear2(x)
        x = x.squeeze(1)
        return x

class PositionalEncoding(nn.Module) :
    def __init__(self, d_model, dropout=0.1, max_len=5000) :
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        pe = torch.zeros(max_len, d_model)
        
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
        
    def forward(self, x) :
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

def get_attention_mask(x) :
    mask = torch.eq(x, 0)
    return mask

In [4]:
def current_time() :
    return time.strftime('%Y-%m-%d %I:%M', time.localtime(time.time()))


In [5]:
ticker = 'KRW-BTC'
interval ='minute10'
to = f'2022-04-05 10:00' #current_time() 
count = 144 # minimum data 개수는 145개

processed_data =  dataloader(ticker, interval)
data = processed_data(to=current_time(), count=count, norm="stand")
display(data.shape)
display(processed_data.original_data)

torch.Size([1, 144, 7])

Unnamed: 0,open,high,low,close,volume,value
2022-04-06 12:00:00,55797000.0,55838000.0,55665000.0,55680000.0,70.036868,3.902847e+09
2022-04-06 12:10:00,55680000.0,55700000.0,55550000.0,55612000.0,40.976261,2.278618e+09
2022-04-06 12:20:00,55579000.0,55678000.0,55555000.0,55664000.0,42.167050,2.344531e+09
2022-04-06 12:30:00,55595000.0,55668000.0,55540000.0,55602000.0,51.541082,2.865317e+09
2022-04-06 12:40:00,55603000.0,55635000.0,55556000.0,55598000.0,23.692987,1.316882e+09
...,...,...,...,...,...,...
2022-04-07 11:10:00,53430000.0,53481000.0,53352000.0,53371000.0,37.450508,2.000400e+09
2022-04-07 11:20:00,53371000.0,53434000.0,53275000.0,53332000.0,35.428555,1.889983e+09
2022-04-07 11:30:00,53275000.0,53521000.0,53275000.0,53508000.0,41.104517,2.195348e+09
2022-04-07 11:40:00,53508000.0,53508000.0,53306000.0,53306000.0,35.685789,1.905776e+09


# model options & define

In [6]:
device = torch.device("cpu")

window_size = 24 * 6
feature_len = 7
d_model=512 
n_head=8
num_layer=4
dropout=0.3
num_class = 1

path = "./model/300E_stand_96_model.pt"

model = Transformer2FC(input_shape=(window_size, feature_len), 
                       d_model=d_model, 
                       n_head=n_head, 
                       num_layer=num_layer, 
                       dropout=dropout, 
                       num_class = num_class).to(device)

model.load_state_dict(torch.load(path, map_location=device))
model.eval()

Transformer2FC(
  (encoder_layer): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
    )
    (linear1): Linear(in_features=512, out_features=2048, bias=True)
    (dropout): Dropout(p=0.3, inplace=False)
    (linear2): Linear(in_features=2048, out_features=512, bias=True)
    (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.3, inplace=False)
    (dropout2): Dropout(p=0.3, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.3, inplace=False)
        

In [7]:
def sigmoid(pred, threshold) :
    ans = F.sigmoid(pred)
    return 1 if ans >= threshold else 0

In [15]:
ticker = 'KRW-BTC'
interval ='minute1'
to = current_time() # f'2022-04-05 00:00'
count = 145 # minimum data 개수는 145개

condition = 1
break_num = 10
datasets = dataloader(ticker, interval)
data  = datasets(to, count, norm='stand').to(device)

seed_money = 1000000000 #10억
coin = 0

start_t = time.time()
while condition :
      
    src_mask = model.generate_square_subsequent_mask(data.shape[1]).to(device)
    pred = model(data, src_mask)
    pred = sigmoid(pred, threshold=0.5)
    
    current_price = datasets.original_data['close'][-1]
    
    print(f"\n\n10 minute later price will be : {'up' if pred == 1 else 'down'}")
    print(f"current time :{to}")
    print(f"current price :{current_price}")
    
    if pred == 1 :
        seed_money -= current_price
        coin += 1
    
    if pred != 1 and coin > 0 :
        seed_money += current_price
        coin -= 1    
    
    if condition == break_num :
        break
        
    condition += 1
    time.sleep(3)
    
    # === 10m later ===
    to = current_time()
    data  = datasets(to, count, norm='stand').to(device)

    later_price = current_price - datasets.original_data['close'][-1]
    
    print("== 10m later ==")
    print(f"current time :{to}")
    print(f"current price : {datasets.original_data['close'][-1]}")
    print(f"Price is {'up' if later_price > 0 else 'down'}")
    
    if later_price > 0 :
        seed_money += datasets.original_data['close'][-1]
        coin -= 1
        
    print("== Wallet ==")
    print(f"Current Money : {seed_money}")
    print(f"Current Coin : {coin}")
    
    if seed_money < 0 :
        break



10 minute later price will be : up
current time :2022-04-08 12:16
current price :53516000.0
== 10m later ==
current time :2022-04-08 12:16
current price : 53517000.0
Price is down
Current Money : 946484000.0


NameError: name 'Coin' is not defined

In [30]:
start = time.time()
time.sleep(1.5)
end = time.time()
print('time elapsed:', end - start)


time elapsed: 1.5130164623260498


In [18]:
tm = time.localtime(time.time())
tm

time.struct_time(tm_year=2022, tm_mon=4, tm_mday=7, tm_hour=22, tm_min=9, tm_sec=0, tm_wday=3, tm_yday=97, tm_isdst=0)

In [21]:
string = time.strftime('%Y-%m-%d %I:%M', tm)
string

'2022-04-07 10:09'