In [1]:
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.8.0+cu101.html
!pip install -q torch-geometric

[K     |████████████████████████████████| 2.6MB 7.7MB/s 
[K     |████████████████████████████████| 1.5MB 7.4MB/s 
[K     |████████████████████████████████| 194kB 7.9MB/s 
[K     |████████████████████████████████| 235kB 15.7MB/s 
[K     |████████████████████████████████| 2.2MB 15.6MB/s 
[K     |████████████████████████████████| 51kB 8.7MB/s 
[?25h  Building wheel for torch-geometric (setup.py) ... [?25l[?25hdone


In [2]:
import glob

ans_path = []
data_path = []
for filename in glob.glob('/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/*.txt'):
  if 'score' in filename:
    ans_path.append(filename)
  else:
    data_path.append(filename)

print(data_path)
print(ans_path)

['/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/29.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/15.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/14.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/28.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/16.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/17.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/13.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/12.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/10.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/11.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/9.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/8.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/5.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/4.txt', '/content/drive/MyDrive/MLG/hw1/hw1_data/Synthetic/5000/6.txt', '/content/drive/MyDrive/MLG/h

In [3]:
import torch
import networkx as nx
from torch_geometric.data import Data, ClusterData, ClusterLoader
from torch_geometric.utils import from_networkx

ans_group = []
data_group = []
loader_group = []

for i in range(30):
  ans = {}
  with open(ans_path[i]) as f:
    line = f.readline()
    while line:
      key, value = line.split()
      key = eval(key)
      ans[key] = eval(value)
      line = f.readline()
  ans_group.append(ans)

  G = nx.read_edgelist(data_path[i], nodetype = int)
  edge_index = from_networkx(G).edge_index
  x = torch.tensor([[G.degree(node),1,1] for node in G.nodes], dtype=torch.float)
  y = torch.tensor([ans[node] for node in G.nodes], dtype=torch.float)
  data = Data(edge_index=edge_index, x=x, y=y)
  data_group.append(data)

In [22]:
import torch
from torch.nn import Linear, GRUCell, ReLU
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
from torch.nn import Sequential

class DrBC(torch.nn.Module):
  def __init__(self, num_L):
    super(DrBC, self).__init__()
    self.num_L = num_L

    self.data_in = Sequential(Linear(data.num_features, 128), ReLU())
    self.Aggregation = GCNConv(128, 128)
    self.Combine = GRUCell(128, 128, bias=False)
    self.data_out = Sequential(Linear(128, 64), ReLU(), Linear(64,1))

  def forward(self, graph):
    # encoder
    h = self.data_in(graph.x.cuda())
    h = F.normalize(h,dim=-1,p=2)
    for _ in range(self.num_L):
      hn = self.Aggregation(h, graph.edge_index.cuda())
      h_new = self.Combine(h, hn)
      h_new = F.normalize(h,dim=-1,p=2)
      h = torch.maximum(h, h_new)
    h = F.normalize(h,dim=-1,p=2)
    z = h

    # decoder
    pred = self.data_out(z)
    pred = torch.squeeze(pred)

    # loss
    label = graph.y.cuda()
    edge_index = graph.edge_index.cuda()
    pair_ids_src, pair_ids_tgt = edge_index[0], edge_index[1]

    preds = torch.index_select(pred, 0, pair_ids_src) - torch.index_select(pred, 0, pair_ids_tgt)
    labels = torch.index_select(label, 0, pair_ids_src) - torch.index_select(label, 0, pair_ids_tgt)
    # preds, labels = pred, label
    preds, labels = torch.sigmoid(preds), torch.sigmoid(labels)
    loss = torch.nn.BCELoss()(preds, labels)
    return pred, loss

In [23]:
def top_n_accuracy(train, n, ans):
  num = int(len(train) * n/100)
  train = train.tolist()
  predict = {}
  for i in range(len(train)):
    predict[i] = train[i]

  predict = sorted(predict.items(), key = lambda x: x[1], reverse = True)[:num]
  ans = sorted(ans.items(), key = lambda x: x[1], reverse = True)[:num]
  ans = [item[0] for item in ans]
  hit = 0
  for i in range(num):
    if predict[i][0] in ans:
      hit += 1
  return hit/num

In [24]:
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))

model = DrBC(5).cuda()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

index = 0
for epoch in range(100000):
    optimizer.zero_grad()
    out, loss = model(data_group[index])
    # print(out)
    print(f'Epoch: {epoch:03d}, G_index: {index:02d}, Loss: {loss:.6f}, Accuracy: {top_n_accuracy(out, 1, ans_group[index])}')
    loss.backward()
    optimizer.step()
    index += 1 if index < 29 else -29

<IPython.core.display.Javascript object>

[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
Epoch: 95000, G_index: 20, Loss: 0.693126, Accuracy: 0.22
Epoch: 95001, G_index: 21, Loss: 0.693127, Accuracy: 0.36
Epoch: 95002, G_index: 22, Loss: 0.693119, Accuracy: 0.18
Epoch: 95003, G_index: 23, Loss: 0.693123, Accuracy: 0.28
Epoch: 95004, G_index: 24, Loss: 0.693122, Accuracy: 0.38
Epoch: 95005, G_index: 25, Loss: 0.693121, Accuracy: 0.32
Epoch: 95006, G_index: 26, Loss: 0.693134, Accuracy: 0.24
Epoch: 95007, G_index: 27, Loss: 0.693128, Accuracy: 0.32
Epoch: 95008, G_index: 28, Loss: 0.693113, Accuracy: 0.18
Epoch: 95009, G_index: 29, Loss: 0.693125, Accuracy: 0.14
Epoch: 95010, G_index: 00, Loss: 0.693125, Accuracy: 0.26
Epoch: 95011, G_index: 01, Loss: 0.693126, Accuracy: 0.18
Epoch: 95012, G_index: 02, Loss: 0.693106, Accuracy: 0.28
Epoch: 95013, G_index: 03, Loss: 0.693128, Accuracy: 0.34
Epoch: 95014, G_index: 04, Loss: 0.693119, Accuracy: 0.22
Epoch: 95015, G_index: 05, Loss: 0.693121, Accuracy: 0.18
Epoch: 95016, G_index: 06, Loss: 0.69

In [12]:
torch.save(model, '/content/drive/MyDrive/MLG/hw1/net.pkl')
torch.save(model.state_dict(), '/content/drive/MyDrive/MLG/hw1/net_params.pkl')

In [None]:
test_path = '/content/drive/MyDrive/MLG/hw1/hw1_data/youtube/com-youtube.txt'
testans_path = '/content/drive/MyDrive/MLG/hw1/hw1_data/youtube/com-youtube_score.txt'

test_ans = {}
with open(testans_path) as f:
  line = f.readline()
  while line:
    key, value = line.split(':')
    key = eval(key)
    test_ans[key] = eval(value)
    line = f.readline()

G = nx.read_edgelist(test_path, nodetype = int)
edge_index = from_networkx(G).edge_index
x = torch.tensor([[G.degree(node),1,1] for node in G.nodes], dtype=torch.float)
y = torch.tensor([test_ans[node] for node in G.nodes], dtype=torch.float)
test_data = Data(edge_index=edge_index, x=x, y=y)
test_data

In [None]:
out, loss = model(test_data)
print(f'Loss: {loss:.6f}, Accuracy: {top_n_accuracy(out, 5, test_ans)}')
print(out)