In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import random
random_seed = 2
torch.manual_seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)
torch.cuda.manual_seed(random_seed)

In [2]:
df = pd.read_csv('qr.csv',index_col=0)
df.head(2)

Unnamed: 0,거래량,미결제약정,상대가격,잔존만기,rate,kvix,분류,anomaly_score,anomaly,이론하한,종가
0,6.511452e-05,0.007142,0.255172,0.008011,1.0,0.130817,1.0,0.45517,0,38.332852,0.172045
1,8.456431e-07,0.000377,0.218227,0.008011,1.0,0.130817,1.0,0.450472,0,18.342202,0.083333


In [3]:
X = df.drop(['종가','이론하한'],axis=1)
y = df[['종가']]

X['rate'] *= 0.01
X['kvix'] *= 0.01
scaler = MinMaxScaler()
scaler.fit(X[['거래량','미결제약정']])
X[['거래량','미결제약정']] = scaler.transform(X[['거래량','미결제약정']])

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0,shuffle=True)
z_train = X_train[['anomaly']]; X_train = X_train.drop('anomaly',axis=1)
z_test = X_test[['anomaly']]; X_test = X_test.drop('anomaly',axis=1)

In [4]:
X_test.sample(frac=1).head(2)

Unnamed: 0,거래량,미결제약정,상대가격,잔존만기,rate,kvix,분류,anomaly_score
126380,0.105997,0.071289,0.200273,0.004005,0.000917,0.002381,0.0,0.548559
46994,1.4e-05,0.002406,0.145396,0.073431,0.007556,0.000572,1.0,0.401421


In [5]:
y_test.sample(frac=1).head(2)

Unnamed: 0,종가
102734,3.4e-05
14846,0.005862


In [6]:
X_train = torch.from_numpy(X_train.values).float()
X_test = torch.from_numpy(X_test.values).float()
y_train = torch.from_numpy(y_train.values).float()
y_test = torch.from_numpy(y_test.values).float()
z_train = torch.from_numpy(z_train.values).float()
z_test = torch.from_numpy(z_test.values).float()

In [7]:
dataset_train = torch.utils.data.TensorDataset(X_train,y_train,z_train)
dataset_test = torch.utils.data.TensorDataset(X_test,y_test,z_test)

In [8]:
dataloader_train = torch.utils.data.DataLoader(dataset_train, batch_size=1000, 
                                               shuffle=True, num_workers=5, drop_last=True)
dataloader_test = torch.utils.data.DataLoader(dataset_test, batch_size=1000, 
                                              shuffle=False, num_workers=5, drop_last=True)

In [9]:
def loop(model_ano,dataloader,optimizer=None):
    
    ano_losses = []
    ano_losses_0 = []; ano_losses_1 = []
    m0 = 0; m1 = 0; m = 0
    for batch_idx, samples in enumerate(dataloader):
        
        if optimizer:
            optimizer.zero_grad()

        x_sample, y_sample, z_sample = samples
        x_sample = x_sample.cuda()
        y_sample = y_sample.cuda()
        z_sample = z_sample.cuda()
        
        n0 = (z_sample==0).sum().item()
        n1 = (z_sample==1).sum().item()
        idx0 = (z_sample==0).squeeze()
        idx1 = (z_sample==1).squeeze()
        
        m0 += n0
        m1 += n1
        m  += (n0+n1)
            
        pred = model_ano(x_sample)
        ano_loss_0 = torch.sum((pred[idx0]-y_sample[idx0])**2)
        ano_loss_1 = torch.sum((pred[idx1]-y_sample[idx1])**2)
        ano_losses_0.append( ano_loss_0.item() )
        ano_losses_1.append( ano_loss_1.item() )
       
        ano_loss = ano_loss_0+ano_loss_1
        ano_losses.append( (ano_loss_0+ano_loss_1).item() )
        if optimizer :
            ano_loss.backward()
            optimizer.step()
            
    return np.sum(ano_losses)/m,np.sum(ano_losses_0)/m0,np.sum(ano_losses_1)/m1

In [10]:
class Model_Ano(nn.Module):
    def __init__(self):
    
        super(Model_Ano, self).__init__()
        
        self.linear1b = nn.Linear(8,10)
        self.linear2b = nn.Linear(10,10)
        self.linear3b = nn.Linear(10,1)
        
        self.linear1 = nn.Linear(7,100)
        self.linear2 = nn.Linear(100,100)
        self.linear3 = nn.Linear(100,1)
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        
        e = self.linear1b(x)
        e = self.relu(e)
        e = self.linear2b(e)
        e = self.relu(e)
        e = self.linear3b(e)
        e = self.sigmoid(e)
    
        c = self.linear1(x[:,:-1])
        c = self.relu(c)
        c = self.linear2(c)
        c = self.relu(c)
        c = self.linear3(c)
        
        return c*e

model_ano = Model_Ano()
model_ano = model_ano.cuda()
    
optimizer = torch.optim.Adam(model_ano.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,60,100], gamma=0.1, verbose=False)
ano_loss_fn = nn.MSELoss()

EPOCHS = 150
ano_loss_list = np.zeros((EPOCHS,2))
ano_loss_list_0 = np.zeros((EPOCHS,2))
ano_loss_list_1 = np.zeros((EPOCHS,2))

In [None]:
for epoch in range(EPOCHS):
    
    train_mse,train_mse_0,train_mse_1 = loop(model_ano,dataloader_train,optimizer)
    print('%-2d'%epoch,'train','%.6e'%train_mse,'%.6e'%train_mse_0,'%.6e'%train_mse_1,end=' ')
    
    with torch.no_grad() :
        test_mse,test_mse_0,test_mse_1 = loop(model_ano,dataloader_test)
        print('%-2d'%epoch,'test','%.6e'%test_mse,'%.6e'%test_mse_0,'%.6e'%test_mse_1)
    
    scheduler.step()
    
    ano_loss_list[epoch,0] = train_mse; ano_loss_list[epoch,1] = test_mse
    ano_loss_list_0[epoch,0] = train_mse_0; ano_loss_list_0[epoch,1] = test_mse_0
    ano_loss_list_1[epoch,0] = train_mse_1; ano_loss_list_1[epoch,1] = test_mse_1

0  train 1.575521e-03 6.923761e-04 7.457754e-03 0  test 4.117332e-05 2.785177e-05 1.286030e-04
1  train 4.218603e-05 3.021892e-05 1.218858e-04 1  test 6.091850e-05 4.560128e-05 1.614458e-04
2  train 3.322878e-05 2.345619e-05 9.831642e-05 2  test 3.286055e-05 2.535407e-05 8.212578e-05
3  train 2.933051e-05 2.184285e-05 7.920247e-05 3  test 2.755026e-05 1.879458e-05 8.501400e-05
4  train 2.467642e-05 1.808053e-05 6.860656e-05 4  test 1.900277e-05 1.189590e-05 6.564532e-05
5  train 2.594091e-05 1.991199e-05 6.609098e-05 5  test 2.137054e-05 1.359233e-05 7.241913e-05
6  train 2.174798e-05 1.605381e-05 5.966501e-05 6  test 3.162112e-05 2.686045e-05 6.286552e-05
7  train 2.274605e-05 1.715838e-05 5.996121e-05 7  test 2.678434e-05 1.884698e-05 7.887747e-05
8  train 1.975141e-05 1.459914e-05 5.406835e-05 8  test 1.943684e-05 1.340633e-05 5.901523e-05
9  train 2.033567e-05 1.537229e-05 5.339125e-05 9  test 1.709519e-05 1.070522e-05 5.903273e-05
10 train 2.010222e-05 1.494279e-05 5.446520e-05 10