In [1]:
import torch
import scipy.io
from torch_geometric.data import Data, DataLoader
from pretrain_gnns.bio.model import GNN
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

In [2]:
# Check if CUDA is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
print(device)

cuda


In [3]:
# Function to load and process the data
def load_graph_data(filepath):
    data = scipy.io.loadmat(filepath)
    x = torch.tensor(data['attrb'].todense(), dtype=torch.float32)
    edge_index = torch.tensor(data['network'].nonzero(), dtype=torch.long)
    edge_attr = torch.ones(edge_index.shape[1], 9) 
    y = torch.tensor(data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

In [4]:
# Load datasets
train_data = load_graph_data('acmv9.mat')
test_data = load_graph_data('citationv1.mat')

  edge_index = torch.tensor(data['network'].nonzero(), dtype=torch.long)


In [5]:
# Load data into PyTorch Geometric DataLoader
train_loader = DataLoader([train_data], batch_size=1, shuffle=True)
test_loader = DataLoader([test_data], batch_size=1, shuffle=False)



In [19]:
# Instantiate the pre-trained model from pretrain-gnns
num_node_features = train_data.x.shape[1]
num_classes = train_data.y.max().item() + 1
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin', drop_ratio=0.5, JK="last")
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

In [12]:
# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

In [13]:
# Fine-tuning loop with mixed precision
model.train()
for epoch in tqdm(range(25), desc="Fine-tuning Progress"):
    optimizer.zero_grad()
    with autocast():
        output = model(train_data.x.to(device), train_data.edge_index.to(device), train_data.edge_attr.to(device))
        loss = criterion(output, train_data.y.to(device))
    scaler.scale(loss).backward()
    scaler.step(optimizer)
    scaler.update()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

Fine-tuning Progress:   0%|          | 0/25 [00:04<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 70.87 GiB. GPU 0 has a total capacity of 4.00 GiB of which 2.51 GiB is free. Of the allocated memory 773.14 MiB is allocated by PyTorch, and 4.86 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Evaluation
model.eval()
with torch.no_grad():
    with autocast():
        output = model(test_data.x.to(device), test_data.edge_index.to(device), test_data.edge_attr.to(device))
    predictions = torch.argmax(output, dim=1)
    accuracy = accuracy_score(test_data.y.cpu(), predictions.cpu())
    micro_f1 = f1_score(test_data.y.cpu(), predictions.cpu(), average='micro')
print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')