In [1]:
import torch
from torch_geometric.datasets import Planetoid
from hivegraph.contrastive.grace import GRACE
from sklearn.linear_model import LogisticRegression
from util import * 
from torch_geometric.utils import to_dense_adj
import torch_geometric.transforms as T 
from augmentation import *
from query_strategies import *
import matplotlib.pyplot as plt




In [2]:
data_splits = [torch.load(f"data_splits\\cora_splits\\split_{i}.pt") for i in range(10)]
dataset_o = data_splits[0].to('cuda')

  data_splits = [torch.load(f"data_splits\\cora_splits\\split_{i}.pt") for i in range(10)]


In [3]:
device = 'cuda'

In [4]:
model = GRACE(num_features=dataset_o.num_features,hidden=128, num_layers=2, drop_edge_rate_1=0.3,drop_edge_rate_2=0.3,drop_feature_rate_1=0.3,drop_feature_rate_2=0.3).to(device)
opt = torch.optim.Adam(model.parameters(), lr=0.01)

In [5]:
for epoch in range(200):
    opt.zero_grad()
    loss = model.train_step(dataset_o.x,dataset_o.edge_index)
    print(loss)
    loss.backward()
    opt.step()

tensor(8.5795, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.5495, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.9849, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0902, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.5890, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.5925, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4774, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.5562, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.5149, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4225, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4340, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4667, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4871, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.3651, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.3875, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.3520, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.3644, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.3566, device='cuda:0',

In [6]:
out = model(dataset_o.x,dataset_o.edge_index)

In [7]:
DROPOUT= 0.3
NUM_PASSES = 10
BUDGET = 150
EPOCHS = 100
SIGNIFICANCE_ITERATIONS = 10

NOISE_PROB = 0.4
NOISE_LEVEL = 0.5

In [8]:
# Augmentations
drop_edge = DropEdge(DROPOUT)
noise_feature_all = NoiseFeature(NOISE_LEVEL, 1)
noise_feature_col = NoiseFeature(NOISE_LEVEL, NOISE_PROB, "col")
noise_feature_row = NoiseFeature(NOISE_LEVEL, NOISE_PROB, "row")
noise_latent = NoiseLatent(NOISE_LEVEL)
mask_feature = MaskFeature(DROPOUT)

drop_edge_noise_all = T.Compose([drop_edge, noise_feature_all])
drop_edge_noise_col = T.Compose([drop_edge, noise_feature_col])
drop_edge_noise_row = T.Compose([drop_edge, noise_feature_row])
drop_edge_mask_feature = T.Compose([drop_edge, mask_feature])

In [None]:
# AUGMENTED ENTROPY
dataset = dataset_o.clone()
acc_aug = []
for b in range(BUDGET):
    pool_indices = get_mask_indices(dataset.train_pool).cpu()

    predictor = LogisticRegression()
    predictor.fit(out[dataset.train_mask].detach().cpu().numpy(), dataset.y[dataset.train_mask].detach().cpu().numpy())
    acc = predictor.score(out[dataset.test_mask].detach().cpu().numpy(), dataset.y[dataset.test_mask].detach().cpu().numpy())

    entropy_sum = torch.zeros(dataset.num_nodes)
    
    for _ in range(NUM_PASSES):
        data_tmp = dataset.clone()
        data_tmp = drop_edge_noise_all(data_tmp)
        out_c = model(data_tmp.x, data_tmp.edge_index)
        pred_log_probas = predictor.predict_log_proba(out_c.detach().cpu().numpy())
        entropies = calculate_entropy(torch.tensor(pred_log_probas))
        entropy_sum += entropies
    entropy_sum /= NUM_PASSES
    
    chosen_node_ix = torch.argmax(entropy_sum[pool_indices])
    chosen_node = pool_indices[chosen_node_ix]
    dataset.train_pool[chosen_node] = False
    dataset.train_mask[chosen_node] = True
    
    predictor = LogisticRegression()
    acc_aug.append(acc)
    print(f"Budget {b} - Accuracy: {acc}")

In [9]:
out.shape

torch.Size([2708, 128])

In [10]:
out_n = out.detach()

In [14]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleLogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(SimpleLogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)
    
    def forward(self, x):
        out = self.linear(x)
        return F.log_softmax(out, dim=1)

# Example usage:
# Assuming input_dim is the number of features and output_dim is the number of classes
def train_predictor(x, y, train_mask, test_mask):
    input_dim = 128
    output_dim = y.max().item() + 1  # Assuming y contains class labels starting from 0

    model = SimpleLogisticRegression(input_dim, output_dim).to(device)
    criterion = nn.NLLLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(EPOCHS):
        model.train()
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output[train_mask], y[train_mask])
        loss.backward()
        optimizer.step()
        
    model.eval()
    _, pred = model(out_n).max(dim=1)
    correct = int(pred[test_mask].eq(y[test_mask]).sum().item())
    acc = correct / int(test_mask.sum())
    return model, acc


In [15]:
predictor, _ = train_predictor(out_n, dataset_o.y, dataset_o.train_mask, dataset_o.test_mask)

In [None]:
predictor
# FINISH

SimpleLogisticRegression(
  (linear): Linear(in_features=128, out_features=7, bias=True)
)

In [None]:
# LATENT NOISE
dataset = dataset_o.clone()
acc_laten_noise = []
out_a = out.clone()
for b in range(BUDGET):
    pool_indices = get_mask_indices(dataset.train_pool).cpu()

    predictor = LogisticRegression()
    predictor.fit(out[dataset.train_mask].detach().cpu().numpy(), dataset.y[dataset.train_mask].detach().cpu().numpy())
    acc = predictor.score(out[dataset.test_mask].detach().cpu().numpy(), dataset.y[dataset.test_mask].detach().cpu().numpy())

    entropy_sum = torch.zeros(dataset.num_nodes)
    
    for _ in range(NUM_PASSES):
        out_c = out_a + noise_latent(out_a)
        pred_log_probas = predictor.predict_log_proba(out_c.detach().cpu().numpy())
        entropies = calculate_entropy(torch.tensor(pred_log_probas))
        entropy_sum += entropies
    entropy_sum /= NUM_PASSES
    
    chosen_node_ix = torch.argmax(entropy_sum[pool_indices])
    chosen_node = pool_indices[chosen_node_ix]
    dataset.train_pool[chosen_node] = False
    dataset.train_mask[chosen_node] = True
    
    predictor = LogisticRegression()
    acc_laten_noise.append(acc)
    print(f"Budget {b} - Accuracy: {acc}")

In [None]:
# ENTROPY
dataset = dataset_o.clone()
acc_entropy = []
for b in range(BUDGET):
    predictor = LogisticRegression()
    predictor.fit(out[dataset.train_mask].detach().cpu().numpy(), dataset.y[dataset.train_mask].detach().cpu().numpy())
    
    acc = predictor.score(out[dataset.test_mask].detach().cpu().numpy(), dataset.y[dataset.test_mask].detach().cpu().numpy())
    pred_log_probas = predictor.predict_log_proba(out.detach().cpu().numpy())
    entropies = calculate_entropy(torch.tensor(pred_log_probas))
    pool_indices = get_mask_indices(dataset.train_pool).cpu()
    chosen_node_ix = torch.argmax(entropies[pool_indices])
    chosen_node = pool_indices[chosen_node_ix]
    dataset.train_pool[chosen_node] = False
    dataset.train_mask[chosen_node] = True
    predictor = LogisticRegression()
    acc_entropy.append(acc)
    print(f"Budget {b} - Accuracy: {acc}")
    

In [None]:
# LATENT DISTANCE
BUDGET = 150
dataset = dataset_o.clone()
acc_latent = []
for b in range(BUDGET):
    
    predictor = LogisticRegression()
    predictor.fit(out[dataset.train_mask].detach().cpu().numpy(), dataset.y[dataset.train_mask].detach().cpu().numpy())
    acc = predictor.score(out[dataset.test_mask].detach().cpu().numpy(), dataset.y[dataset.test_mask].detach().cpu().numpy())
    
    pool_indices = get_mask_indices(dataset.train_pool).cpu()

    dist_matrix = torch.cdist(out[dataset.train_pool], out[dataset.train_pool])
    adj = to_dense_adj(dataset.edge_index)[0][dataset.train_pool][:,dataset.train_pool]
    adj = adj.cuda()
    dist_matrix = dist_matrix * adj
    
    
    max_dist = dist_matrix.mean(dim=1)
    min_max_dist = torch.argmin(max_dist)
    chosen_node = pool_indices[min_max_dist]

    
    dataset.train_pool[chosen_node] = False
    dataset.train_mask[chosen_node] = True
    # predictor = LogisticRegression()
    acc_latent.append(acc)
    print(f"Budget {b} - Accuracy: {acc}")

In [None]:
def loss_fn(y, y_hat):
    return torch.mean((y-y_hat)**2)

In [None]:
t = torch.tensor([[1],[2],[3]], dtype=torch.float32)
t = F.normalize(t, p=2, dim=0)
t.requires_grad = True
y = torch.tensor([7,8,9], dtype=torch.float32)
sgd = torch.optim.SGD([t], lr=0.1)

In [None]:
model.eval()

In [None]:
model.linear.weight

In [None]:
for e in range(30):
    sgd.zero_grad()
    y_hat = model(t)
    l = loss_fn(y, y_hat)
    l.backward()
    sgd.step()
    print(f"Loss: {l}, t: {t} w:{model.linear.weight[0].item()}")
    

In [None]:

plt.plot(acc_aug, label="Augmented Entropy")
plt.plot(acc_entropy, label="Entropy")
plt.plot(acc_latent, label="Latent Distance")
plt.plot(acc_laten_noise, label="Latent Noise")
plt.legend()
plt.show()