In [2]:
from sklearn.metrics import roc_auc_score

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import dgl

from collections import Counter

Using backend: pytorch


In [8]:
data = np.load('./phase1_gdata.npz')

node_feat = data['x']
node_label = data['y']

train_mask = data['train_mask']
test_mask = data['test_mask']

In [9]:
class MLP(nn.Module):
    def __init__(self, in_feat, h_feat, out_feat):
        super(MLP, self).__init__()
        self.linear1 = nn.Linear(in_feat, h_feat)
        self.linear2 = nn.Linear(h_feat, h_feat)
        self.linear3 = nn.Linear(h_feat, out_feat)
        
    def forward(self, x):
        x = self.linear1(x)
        x = F.relu(x)
        x = nn.Dropout(p=0.2)(x)
        x = self.linear2(x)
        x = F.relu(x)
        x = self.linear3(x)
        return x

In [10]:
model = MLP(node_feat.shape[1], 128, 1)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [11]:
np.random.shuffle(train_mask)
train_idx = train_mask[:int(len(train_mask)/10*8)]
val_idx = train_mask[int(len(train_mask)/10*8):]

In [12]:
batch_size = 2048
epochs = 10

for epoch in range(epochs):
    model.train()
    for i in range(len(train_idx)//batch_size):
        idx = train_idx[batch_size*i : batch_size*(i+1)]
        
        sample = torch.Tensor(node_feat[idx])
        labels = torch.Tensor(node_label[idx])
        
        logit = model(sample).view(-1)        
        loss = F.binary_cross_entropy_with_logits(logit, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i%100 == 0:
            print("epoch {}, step {}, loss {}".format(epoch, i, loss))
            
            
    model.eval()
    with torch.no_grad():
        val_sample = torch.Tensor(node_feat[val_idx])
        val_logits = model(val_sample).view(-1).numpy()
        val_labels = torch.Tensor(node_label[val_idx]).numpy()
        val_auc = roc_auc_score(val_labels, val_logits)
        print("epoch {}, val_auc {}".format(epoch, val_auc))

epoch 0, step 0, loss 0.7024523019790649
epoch 0, step 100, loss 0.06569257378578186
epoch 0, step 200, loss 0.05373265594244003
epoch 0, step 300, loss 0.07308683544397354
epoch 0, val_auc 0.7119663306248294
epoch 1, step 0, loss 0.05062979459762573
epoch 1, step 100, loss 0.06650225073099136
epoch 1, step 200, loss 0.052907027304172516


KeyboardInterrupt: 

In [24]:
model.eval()
with torch.no_grad():
    test_sample = torch.Tensor(node_feat[test_mask])
    test_logits = F.sigmoid(model(test_sample)).numpy()

In [41]:
res = np.concatenate((1-test_logits, test_logits), axis=1)
np.save("mlp.npy", res)

In [25]:
torch.max(F.sigmoid(logit))

tensor(0.0680, grad_fn=<MaxBackward1>)

In [18]:
labels

tensor([0., 0., 0.,  ..., 0., 0., 0.])

In [14]:
loss = F.binary_cross_entropy_with_logits(logit, labels)

In [16]:
loss

tensor(0.0668, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)