<a href="https://colab.research.google.com/github/erickuo5124/MLG_HW2/blob/main/GNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric

In [2]:
import pandas as pd

train = pd.read_csv('/content/drive/MyDrive/MLG/hw2/hw2_data/dataset1/train.csv')
test = pd.read_csv('/content/drive/MyDrive/MLG/hw2/hw2_data/dataset1/test.csv')
content = pd.read_csv('/content/drive/MyDrive/MLG/hw2/hw2_data/dataset1/content.csv', delimiter='\t', header=None, index_col=0).sort_index()

In [3]:
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx

edge_index = train.loc[train['label'] == 1]
x = torch.tensor(content.values, dtype=torch.float)
data = Data(x=x)
data

Data(x=[2708, 1433])

In [4]:
from torch.nn import Linear, CosineSimilarity
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
  def __init__(self, hidden_layer):
    super(Net, self).__init__()
    torch.manual_seed(12345)
    self.conv1 = GCNConv(data.num_features, hidden_layer)
    self.conv2 = GCNConv(hidden_layer, hidden_layer)
    self.conv3 = GCNConv(hidden_layer, 64)
    self.classifier = Linear(64, 64)
    self.similarity = torch.nn.CosineSimilarity(dim=1, eps=1e-6)

  def forward(self, x, train_edges, pred_edges):
    train_edges = train_edges.loc[train_edges['label'] == 1]
    train_edges = torch.tensor([train_edges['from'].values, train_edges['to'].values]).cuda()

    # Encoder
    h = self.conv1(x.cuda(), train_edges)
    h = h.tanh()
    h = self.conv2(h, train_edges)
    h = h.tanh()
    h = self.conv3(h, train_edges)
    h = h.tanh()  # Final GNN embedding space.
    nodes = self.classifier(h)
    nodes = torch.squeeze(nodes)
    
    # Decoder
    ids_from = torch.tensor(pred_edges['from'].values).cuda()
    ids_to = torch.tensor(pred_edges['to'].values).cuda()
    preds = self.similarity(torch.index_select(nodes, 0, ids_from), torch.index_select(nodes, 0, ids_to))
    
    return torch.abs(torch.sigmoid(preds))

In [8]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

from sklearn.metrics import roc_auc_score, average_precision_score
from sklearn.model_selection import train_test_split

model = Net(128).cuda()
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(3000):
  optimizer.zero_grad()
  train_edges, test_edges = train_test_split(train, shuffle=True)
  out = model(data.x, train_edges, test_edges)
  label = torch.tensor(test_edges['label'].values, dtype=torch.float).cuda()
  loss = criterion(out, label)
  print(out)
  print(f'Epoch: {epoch:03d}, Loss: {loss:.6f}')
  print(f'roc_auc_score: {roc_auc_score(label.cpu().detach().numpy(), out.cpu().detach().numpy())}, average_precision_score: {average_precision_score(label.cpu().detach().numpy(), out.cpu().detach().numpy())}')
  loss.backward()
  optimizer.step()

<IPython.core.display.Javascript object>

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
tensor([0.7181, 0.7233, 0.2705,  ..., 0.6509, 0.6838, 0.3821], device='cuda:0',
       grad_fn=<AbsBackward>)
Epoch: 1750, Loss: 0.425935
roc_auc_score: 0.9946278713577367, average_precision_score: 0.9951202444976399
tensor([0.6787, 0.7279, 0.7225,  ..., 0.7022, 0.7110, 0.3024], device='cuda:0',
       grad_fn=<AbsBackward>)
Epoch: 1751, Loss: 0.424060
roc_auc_score: 0.9964443634236198, average_precision_score: 0.9963528272327404
tensor([0.7249, 0.6083, 0.6826,  ..., 0.7257, 0.4366, 0.7272], device='cuda:0',
       grad_fn=<AbsBackward>)
Epoch: 1752, Loss: 0.426497
roc_auc_score: 0.9949585618375427, average_precision_score: 0.9954346988450045
tensor([0.3619, 0.3310, 0.3258,  ..., 0.3446, 0.3131, 0.3880], device='cuda:0',
       grad_fn=<AbsBackward>)
Epoch: 1753, Loss: 0.424743
roc_auc_score: 0.9949029274449661, average_precision_score: 0.9951625731150431
tensor([0.7254, 0.4938, 0.6847,  ..., 0.4156, 0.7095, 0.4193], device='cuda:0',
       grad_fn=<A

In [9]:
import csv

upload = model(data.x, edge_index, test)
upload = upload.tolist()

with open('/content/drive/MyDrive/MLG/hw2/hw2_data/dataset1/upload.csv', 'w') as f:
  writer = csv.writer(f)
  writer.writerow(['id', 'prob'])
  for pred, (_, edge) in zip(upload, test.iterrows()):
    writer.writerow([edge['id'], pred])