In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import random
random_seed = 8
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)
torch.cuda.manual_seed(random_seed)

In [2]:
df = pd.read_csv('qr.csv',index_col=0)
df.head(2)

Unnamed: 0,거래량,미결제약정,상대가격,잔존만기,rate,kvix,분류,anomaly_score,anomaly,이론하한,종가
0,6.511452e-05,0.007142,0.255172,0.008011,1.0,0.130817,1.0,0.45517,0,38.332852,0.172045
1,8.456431e-07,0.000377,0.218227,0.008011,1.0,0.130817,1.0,0.450472,0,18.342202,0.083333


In [3]:
X = df.drop(['종가','이론하한'],axis=1)
y = df[['종가']]

X['rate'] *= 0.01
X['kvix'] *= 0.01
scaler = MinMaxScaler()
scaler.fit(X[['거래량','미결제약정','anomaly_score']])
X[['거래량','미결제약정','anomaly_score']] = scaler.transform(X[['거래량','미결제약정','anomaly_score']])

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0,shuffle=True)
z_train = X_train[['anomaly']]; X_train = X_train.drop('anomaly',axis=1)
z_test = X_test[['anomaly']]; X_test = X_test.drop('anomaly',axis=1)

In [4]:
X_test.sample(frac=1).head(2)

Unnamed: 0,거래량,미결제약정,상대가격,잔존만기,rate,kvix,분류,anomaly_score
125750,0.014087,0.04125,0.242525,0.006676,7.2e-05,0.002284,0.0,0.224755
52372,8.6e-05,0.003393,0.27079,0.076101,0.007381,0.000314,0.0,0.064314


In [5]:
y_test.sample(frac=1).head(2)

Unnamed: 0,종가
184071,0.011809
66569,0.027385


In [6]:
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(X_test.values).float()
y_train = torch.from_numpy(y_train.values).float()
y_test = torch.from_numpy(y_test.values).float()
z_train = torch.from_numpy(z_train.values).float()
z_test = torch.from_numpy(z_test.values).float()

In [7]:
dataset_train = torch.utils.data.TensorDataset(X_train,y_train,z_train)
dataset_test = torch.utils.data.TensorDataset(X_test,y_test,z_test)

In [8]:
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=1000, 
                                               shuffle=True, num_workers=5, drop_last=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1000, 
                                              shuffle=False, num_workers=5, drop_last=True)

In [9]:
def loop(model_ano,dataloader,optimizer=None):
    
    ano_losses = []
    ano_losses_0 = []; ano_losses_1 = []
    m0 = 0; m1 = 0; m = 0
    for batch_idx, samples in enumerate(dataloader):
        
        if optimizer:
            optimizer.zero_grad()

        x_sample, y_sample, z_sample = samples
        x_sample = x_sample.cuda()
        y_sample = y_sample.cuda()
        z_sample = z_sample.cuda()
        
        n0 = (z_sample==0).sum().item()
        n1 = (z_sample==1).sum().item()
        idx0 = (z_sample==0).squeeze()
        idx1 = (z_sample==1).squeeze()
        
        m0 += n0
        m1 += n1
        m  += (n0+n1)
            
        pred = model_ano(x_sample)
        ano_loss_0 = torch.sum((pred[idx0]-y_sample[idx0])**2)
        ano_loss_1 = torch.sum((pred[idx1]-y_sample[idx1])**2)
        ano_losses_0.append( ano_loss_0.item() )
        ano_losses_1.append( ano_loss_1.item() )
       
        ano_loss = ano_loss_0+ano_loss_1
        ano_losses.append( (ano_loss_0+ano_loss_1).item() )
        if optimizer :
            ano_loss.backward()
            optimizer.step()
            
    return np.sum(ano_losses)/m,np.sum(ano_losses_0)/m0,np.sum(ano_losses_1)/m1

In [10]:
class Model_Ano(nn.Module):
    def __init__(self):
    
        super(Model_Ano, self).__init__()
        
        self.linear1b = nn.Linear(8,100)
        self.linear2b = nn.Linear(100,1)
        
        self.linear1 = nn.Linear(7,100)
        self.linear2 = nn.Linear(100,100)
        self.linear3 = nn.Linear(100,1)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        
        e = self.linear1b(x)
        e = self.relu(e)
        e = self.linear2b(e)
        e = self.sigmoid(e)
    
        c = self.linear1(x[:,:-1])
        c = self.relu(c)
        c = self.linear2(c)
        c = self.relu(c)
        c = self.linear3(c)
        
        return c*e

model_ano = Model_Ano()
model_ano = model_ano.cuda()
    
optimizer = torch.optim.Adam(model_ano.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,100], gamma=0.1, verbose=False)
ano_loss_fn = nn.MSELoss()

EPOCHS = 150
ano_loss_list = np.zeros((EPOCHS,2))
ano_loss_list_0 = np.zeros((EPOCHS,2))
ano_loss_list_1 = np.zeros((EPOCHS,2))

In [11]:
for epoch in range(EPOCHS):
    
    train_mse,train_mse_0,train_mse_1 = loop(model_ano,dataloader_train,optimizer)
    print('%-2d'%epoch,'train','%.6e'%train_mse,'%.6e'%train_mse_0,'%.6e'%train_mse_1,end=' ')
    
    with torch.no_grad() :
        test_mse,test_mse_0,test_mse_1 = loop(model_ano,dataloader_test)
        print('%-2d'%epoch,'test','%.6e'%test_mse,'%.6e'%test_mse_0,'%.6e'%test_mse_1)
    
    scheduler.step()
    
    ano_loss_list[epoch,0] = train_mse; ano_loss_list[epoch,1] = test_mse
    ano_loss_list_0[epoch,0] = train_mse_0; ano_loss_list_0[epoch,1] = test_mse_0
    ano_loss_list_1[epoch,0] = train_mse_1; ano_loss_list_1[epoch,1] = test_mse_1

0  train 2.393091e-03 1.123072e-03 1.085088e-02 0  test 6.576212e-05 5.562454e-05 1.322953e-04
1  train 4.163702e-05 3.222863e-05 1.042960e-04 1  test 3.743404e-05 2.604161e-05 1.122028e-04
2  train 3.066872e-05 2.291184e-05 8.232877e-05 2  test 2.485060e-05 1.680779e-05 7.763579e-05
3  train 3.800401e-05 3.042597e-05 8.848045e-05 3  test 6.710370e-05 5.643374e-05 1.371309e-04
4  train 3.359984e-05 2.539776e-05 8.822488e-05 4  test 2.299366e-05 1.520163e-05 7.413291e-05
5  train 2.455738e-05 1.803764e-05 6.797608e-05 5  test 2.228446e-05 1.576220e-05 6.509019e-05
6  train 3.109663e-05 2.359785e-05 8.104019e-05 6  test 5.479410e-05 3.372333e-05 1.930821e-04
7  train 2.856667e-05 2.007062e-05 8.514674e-05 7  test 2.011939e-05 1.316359e-05 6.577049e-05
8  train 2.295793e-05 1.664265e-05 6.502531e-05 8  test 2.047518e-05 1.329131e-05 6.762309e-05
9  train 2.531348e-05 1.843247e-05 7.114480e-05 9  test 1.824141e-05 1.218854e-05 5.796658e-05
10 train 2.692570e-05 1.947801e-05 7.652420e-05 10