In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.utils import degree
from sklearn.model_selection import train_test_split


device = torch.device('cuda:2' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=2)

In [2]:
torch.__version__

'1.12.0'

In [3]:
cora_dataset = Planetoid(root='.', name='Cora')
cora_data = cora_dataset[0].to(device)
cora_data.num_features

1433

In [4]:
cora_loader = DataLoader(cora_dataset, batch_size=16, shuffle=True)
for batch in cora_loader:
    print(batch)

DataBatch(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], batch=[2708], ptr=[2])


In [7]:
# cora_data.train_mask
cora_data.y.unique()

tensor([0, 1, 2, 3, 4, 5, 6], device='cuda:2')

## Dummy data

In [5]:
## making Dummy data

features = torch.rand(5,3, dtype=float) # 5 nodes, 3 features
edges = torch.randint_like(torch.zeros(2,4), 2)

## Making graph un directed
edges = torch.cat([edges, torch.stack([ edges[1], edges[0]], dim = 0)], dim = 1)

print('features:\n', features)
print('edges:\n', edges)

graph_data = Data(x=features, edge_index=edges)

graph_data.is_directed()

features:
 tensor([[0.2521, 0.9817, 0.0370],
        [0.0510, 0.4707, 0.3776],
        [0.1217, 0.2523, 0.0912],
        [0.0775, 0.5659, 0.9049],
        [0.2312, 0.4914, 0.9360]], dtype=torch.float64)
edges:
 tensor([[0., 1., 0., 0., 1., 0., 1., 1.],
        [1., 0., 1., 1., 0., 1., 0., 0.]])


False

## Model

In [6]:
class GCN(nn.Module):
    def __init__(self, in_features, num_class = 2):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(in_features, 50)
        self.conv2 = GCNConv(50, num_class)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        self.conv1(x, edge_index)
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)


## Train

In [9]:
## temp training loop
epochs = 50
num_class = 7
num_features = cora_data.num_features
model = GCN(num_features,num_class ).to(device)
# model = GCN(3).to(device)
model
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01, weight_decay=5e-4)

model.train()

def train_once(model, optimizer, data):
    optimizer.zero_grad()
    output = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def test_once(model, data):
    model.eval()
    pred = model(data).argmax(dim=1)
    correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
    acc = int(correct)/ int(data.test_mask.sum())
    return acc


epoch_loss_list = []
for epoch in range(epochs):
    optimizer.zero_grad()

    out = model(cora_data)
    # loss = F.nll_loss(out, cora_data.y)
    loss = F.nll_loss(out[cora_data.train_mask], cora_data.y[cora_data.train_mask])
    loss.backward()
    optimizer.step()

    print(loss.item())


1.9488041400909424
1.7172476053237915
1.4754453897476196
1.2062066793441772
0.957212507724762
0.7603483200073242
0.5763006210327148
0.4430246651172638
0.34446558356285095
0.23802506923675537
0.19557404518127441
0.14379356801509857
0.10606261342763901
0.08529900759458542
0.07111141085624695
0.05544662848114967
0.04522155970335007
0.028784608468413353
0.04151010140776634
0.02557826228439808
0.02306334301829338
0.03018876537680626
0.02635553665459156
0.016293328255414963
0.013776899315416813
0.012089678086340427
0.010503326542675495
0.012259312905371189
0.012226182036101818
0.007974456064403057
0.01450271811336279
0.008107977919280529
0.006945633329451084
0.005296339746564627
0.007954574190080166
0.010217642411589622
0.010253830812871456
0.005610763560980558
0.009604820981621742
0.010569410398602486
0.007973745465278625
0.007452824153006077
0.011830199509859085
0.008153163827955723
0.011647247709333897
0.011631709523499012
0.008580955676734447
0.011507805436849594
0.010221350006759167
0.0

## Trying Our Preprocessing 

In [11]:
def preprocess_x(x):

    vectors_along_one = torch.matmul(x.T, torch.ones(x.shape[0]).type(torch.LongTensor)) * torch.ones(x.shape[0]).unsqueeze_(dim=1)
    x_pp = x - vectors_along_one

    return x_pp

In [13]:
x = torch.randint(10, (2,3))
print(x)

vectors_along_one = torch.matmul(x.T, torch.ones(x.shape[0]).type(torch.LongTensor)) * torch.ones(x.shape[0]).unsqueeze_(dim=1)
vectors_along_one /= torch.norm(torch.ones(x.shape[0]))**2
print(vectors_along_one)

x_new = x - vectors_along_one
print(x_new)

torch.matmul(x_new.T, torch.ones(x_new.shape[0]))

tensor([[6, 1, 0],
        [1, 9, 2]])
tensor([[3.5000, 5.0000, 1.0000],
        [3.5000, 5.0000, 1.0000]])
tensor([[ 2.5000, -4.0000, -1.0000],
        [-2.5000,  4.0000,  1.0000]])


tensor([-4.7684e-07, -9.5367e-07, -2.3842e-07])

In [10]:
v1 = torch.tensor([1.,2.])
v2 = torch.tensor([1.,1.])
v3 = torch.tensor([1.,-1.])

alpha1 = torch.dot(v1,v2)/torch.norm(v2)**2
alpha2 = torch.dot(v1,v3)/torch.norm(v3)**2

print(alpha1, alpha2)

alpha1 * torch.tensor([1.,1.]) + alpha2 * torch.tensor([1.,-1.])

tensor(1.5000) tensor(-0.5000)


tensor([1.0000, 2.0000])

## Getting edges from graph Data

In [9]:
cora_dataset = Planetoid(root='.', name='Cora')
cora_data = cora_dataset[0]
cora_data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708])

In [34]:
degree(cora_data.edge_index[0]) + 1

tensor([4., 4., 6.,  ..., 2., 5., 5.])

In [31]:
node_deg =torch.zeros(cora_data.num_nodes)
print('total nodes', node_deg.shape)


for ni,nj in cora_data.edge_index.T:
    # print(ni, nj)
    node_deg[nj] +=1

node_deg

total nodes torch.Size([2708])


tensor([3., 3., 5.,  ..., 1., 4., 4.])

In [41]:
def preprocess_x_with_deg(x, edge_index):
    vec_deg = degree(edge_index[0]) + 1
    vec_deg = torch.sqrt(vec_deg) # vec_deg[i] = sqrt(d_i + 1)
    vectors_along_deg = torch.matmul(x.T, vec_deg) * vec_deg.unsqueeze_(dim=1)
    vectors_along_deg /= torch.norm(vec_deg)**2
    x_pp = x - vectors_along_deg

    return x_pp

x_perp = preprocess_x_with_deg(cora_data.x, cora_data.edge_index)

torch.matmul(x_perp.T, torch.sqrt(degree(cora_data.edge_index[0]) + 1))


tensor([-7.1581e-06, -2.3645e-05, -3.1546e-05,  ..., -8.8847e-06,
         1.0053e-04, -5.3493e-07])

# Todo
- [GCN takes all data features, it just hides labels of test nodes] Make dataloader for batched operation

# Rough

In [1]:
import yaml
from utils import get_config, config_to_dict

config = get_config(config_path='temp.yaml')
# config = get_config()

config_dict = config_to_dict(config)


with open('temp.yaml', '+w') as f:
    yaml.dump(config_dict, f)

In [7]:
config.__dict__

{'exp_name': 'exp_cora_vec_deg_v1',
 'description': 'Experiment for GCN training on cora dataset, removeing vectors of degrees i.e. sqrt(d_i + 1)',
 'epochs': 100,
 'batch_size': 16,
 'optimizer': namespace(name='adam', lr=0.01, wt_decay=0.0005),
 'data': namespace(name='planetoid', num_class=7, num_features=1433),
 'result_file': 'result.csv',
 'seed': 1234}

In [11]:
import json
json.loads(config.__dict__, object_hook= lambda x: x.__dict__)

TypeError: the JSON object must be str, bytes or bytearray, not dict