In [0]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import roc_auc_score

In [0]:
train = pd.read_csv("train.csv").drop(columns=['opened_position_qty ', 'closed_position_qty'])
test = pd.read_csv("test.csv").drop(columns=['opened_position_qty ', 'closed_position_qty'])
df = pd.concat([train,test],sort=False)

In [0]:
bid_cols = ['bid1','bid2', 'bid3', 'bid4', 'bid5']
bid_vol_cols = ['bid1vol', 'bid2vol', 'bid3vol', 'bid4vol', 'bid5vol']
ask_cols = ['ask1', 'ask2', 'ask3', 'ask4', 'ask5',]
ask_vol_cols = ['ask1vol','ask2vol', 'ask3vol', 'ask4vol', 'ask5vol']

group_cols = {"bid_cols":bid_cols,"bid_vol_cols":bid_vol_cols,"ask_cols":ask_cols,"ask_vol_cols":ask_vol_cols}

In [0]:
for group in group_cols.keys():
    df[f"{group}_max"] = df[group_cols[group]].max(axis=1)
    df[f"{group}_min"] = df[group_cols[group]].min(axis=1)
    df[f"{group}_spread"] = df[f"{group}_max"].div(df[f"{group}_min"])
    
df["last_price_div__mid"] = df["last_price"].div(df["mid"])
df["d_opn_intrst_div_trnsctd_qty"] = df["d_open_interest"].div(df["transacted_qty"])
df["d_opn_intrst_div_trnsctd_qty"] = df["d_opn_intrst_div_trnsctd_qty"].fillna(0)


In [0]:
train = df.loc[~df.y.isna()]
test = df.loc[df.y.isna()].drop(columns=['id', 'y'])

train_y = train.y
train_x = train.drop(columns=['id', 'y'])

TRAIN_SIZE = 500000
perm = np.random.permutation(len(train_x))

df_train = TensorDataset(torch.tensor(train_x.iloc[perm[:TRAIN_SIZE]].to_numpy()), torch.tensor(train_y.iloc[perm[:TRAIN_SIZE]].to_numpy()))

val_x = torch.tensor(train_x.iloc[perm[TRAIN_SIZE:]].to_numpy())
val_y = train_y.iloc[perm[TRAIN_SIZE:]].to_numpy(dtype='double')

In [0]:
model = nn.Sequential(  
    nn.Linear(38, 300),
    nn.PReLU(),
    nn.Linear(300, 300),
    nn.PReLU(),
    nn.Linear(300, 40),
    nn.PReLU(),
    nn.Linear(40, 1)
)

In [0]:
optimizer = torch.optim.Adam(model.parameters(), lr=1e-6)
loss_fn = nn.BCEWithLogitsLoss()
train_loader = DataLoader(df_train, batch_size=128, shuffle=True)

for epoch in range(30):
    model.train()
    for i, (data, target) in enumerate(train_loader):
        optimizer.zero_grad()
        output = torch.squeeze(model(data.float()))
        loss = loss_fn(output, target.float())
        loss.backward()
        optimizer.step()

        if i % 55 == 0: print('.',end='')
    print('Train Epoch: %d  Loss: %.4f' % (epoch + 1,  loss.item()))

    # Validate
    model.eval()
    with torch.no_grad():
        print(model(val_x.float()))
        vscore = roc_auc_score(
                val_y,
                torch.sigmoid(torch.squeeze(model(val_x.float()))).numpy())
        
    # Track validation error each epoch
    print('Validation score: %.4f' % (vscore))

........................................................................Train Epoch: 1  Loss: 0.6211
tensor([[-0.6556],
        [-0.7549],
        [-0.8557],
        ...,
        [-0.8399],
        [-0.5085],
        [-0.6438]])
Validation score: 0.5495
........................................................................Train Epoch: 2  Loss: 0.7286
tensor([[-0.6235],
        [-0.7381],
        [-0.8626],
        ...,
        [-0.8003],
        [-0.4767],
        [-0.6259]])
Validation score: 0.5610
........................................................................Train Epoch: 3  Loss: 0.6124
tensor([[-0.4330],
        [-0.5692],
        [-0.7228],
        ...,
        [-0.6015],
        [-0.2861],
        [-0.4546]])
Validation score: 0.5727
........................................................................Train Epoch: 4  Loss: 0.6105
tensor([[-0.6110],
        [-0.7692],
        [-0.9499],
        ...,
        [-0.7778],
        [-0.4750],
        [-0.6662]])
Validatio

In [0]:
model.eval()
with torch.no_grad():
    test_x = torch.tensor(test.to_numpy())
    df_test = pd.read_csv('test.csv', index_col=0)
    df_test['Predicted'] = torch.sigmoid(torch.squeeze(model(test_x.float()))).numpy()
    df_test[['Predicted']].to_csv('submission.csv')