In [1]:
import torch
import torch.nn.functional as F
import scipy.io
from pretrain_gnns.bio.model import GNN
from sklearn.metrics import accuracy_score, f1_score
from torch_geometric.data import Data
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler, autocast
from torch_geometric.data import Batch



In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [3]:
# # Adapt the BioDataset class to load .mat files
# class MatBioDataset(BioDataset):
#     def __init__(self, mat_file, transform=None):
#         self.data = scipy.io.loadmat(mat_file)
#         self.graphs = []
#         self.labels = []

#         x = torch.tensor(self.data['attrb'].todense(), dtype=torch.float32)
#         edge_index = torch.tensor(self.data['network'].nonzero(), dtype=torch.long)
#         y = torch.tensor(self.data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
#         edge_attr = torch.ones(edge_index.shape[1], 9)
        
#         self.graphs.append(Data(x=x, edge_index=edge_index, edge_attr=edge_attr))
#         self.labels.append(y)

#     def __len__(self):
#         return len(self.graphs)

#     def __getitem__(self, idx):
#         return self.graphs[idx], self.labels[idx]

# Simple Dataset class
class SimpleDataset(torch.utils.data.Dataset):
    def __init__(self, mat_file):
        self.data = scipy.io.loadmat(mat_file)
        self.graphs = []
        self.labels = []

        x = torch.tensor(self.data['attrb'].todense(), dtype=torch.float32)
        edge_index = torch.tensor(self.data['network'].nonzero(), dtype=torch.long)
        y = torch.tensor(self.data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
        edge_attr = torch.ones(edge_index.shape[1], 9)

        self.graphs.append(Data(x=x, edge_index=edge_index, edge_attr=edge_attr))
        self.labels.append(y)

    def __len__(self):
        return len(self.graphs)

    def __getitem__(self, idx):
        return self.graphs[idx], self.labels[idx]
    
def collate_fn(batch):
    return Batch.from_data_list(batch)

In [4]:
# Load datasets
train_dataset = SimpleDataset('acmv9.mat')
test_dataset = SimpleDataset('citationv1.mat')

  edge_index = torch.tensor(self.data['network'].nonzero(), dtype=torch.long)


In [5]:
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, collate_fn=collate_fn)

In [6]:
# Load pre-trained model
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin')
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth'), strict=False)
model = model.to(device)

In [7]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

In [8]:
# Fine-tuning loop
model.train()
for epoch in range(25):
    for batch in train_loader:
        optimizer.zero_grad()
        with autocast():
            output = model(batch[0].x.to(device), batch[0].edge_index.to(device), batch[0].edge_attr.to(device))
            loss = criterion(output, batch[1].to(device))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

AttributeError: 'tupleBatch' object has no attribute 'stores_as'

In [None]:
# Evaluation
model.eval()
all_predictions = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        with autocast():
            output = model(batch[0].x.to(device), batch[0].edge_index.to(device), batch[0].edge_attr.to(device))
        predictions = torch.argmax(output, dim=1)
        all_predictions.append(predictions.cpu())
        all_labels.append(batch[1].cpu())

all_predictions = torch.cat(all_predictions)
all_labels = torch.cat(all_labels)

accuracy = accuracy_score(all_labels, all_predictions)
micro_f1 = f1_score(all_labels, all_predictions, average='micro')

print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

In [5]:
import torch
import torch.nn.functional as F
import scipy.io
from torch.utils.data import DataLoader
from pretrain_gnns.bio.loader import BioDataset
from pretrain_gnns.bio.dataloader import DataLoaderFinetune
from pretrain_gnns.bio.model import GNN
from sklearn.metrics import accuracy_score, f1_score
from torch.cuda.amp import GradScaler, autocast
from torch_geometric.data import Data

# Step 1: Load and preprocess the data
acm_data = scipy.io.loadmat('acmv9.mat')
citation_data = scipy.io.loadmat('citationv1.mat')

X_train = torch.tensor(acm_data['attrb'].todense(), dtype=torch.float32)
y_train = torch.tensor(acm_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
edge_index_train = torch.tensor(acm_data['network'].nonzero(), dtype=torch.long)
edge_attr_train = torch.tensor(acm_data['network'].todense(), dtype=torch.float32)  # Adjust if needed

X_test = torch.tensor(citation_data['attrb'].todense(), dtype=torch.float32)
y_test = torch.tensor(citation_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
edge_index_test = torch.tensor(citation_data['network'].nonzero(), dtype=torch.long)
edge_attr_test = torch.tensor(citation_data['network'].todense(), dtype=torch.float32)  # Adjust if needed

# Create PyTorch Geometric Data objects
train_data = [BioDataset(Data(x=X_train, edge_index=edge_index_train, edge_attr=edge_attr_train, y=y_train, data_type='supervised'), data_type='supervised')]
test_data = [BioDataset(Data(x=X_test, edge_index=edge_index_test, edge_attr=edge_attr_test, y=y_test, data_type='supervised'), data_type='supervised')]

# Step 2: Create DataLoader
train_loader = DataLoaderFinetune(train_data, batch_size=1, shuffle=True)
test_loader = DataLoaderFinetune(test_data, batch_size=1, shuffle=False)

# Step 3: Load pretrained model
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin')  # Adjust num_layer and emb_dim as per pretrained model
checkpoint = torch.load('pretrain_gnns/bio/model_gin/supervised.pth')
model.load_state_dict(checkpoint, strict=False)

# Use CUDA if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Step 4: Set up optimizer, loss function, and scaler
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

# Step 5: Training loop
model.train()
for epoch in range(25):
    for batch in train_loader:
        optimizer.zero_grad()
        with autocast():
            # Move batch data to the device
            batch = batch.to(device)
            output = model(batch.x, batch.edge_index, batch.edge_attr)
            loss = criterion(output, batch.y)
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

# Step 6: Evaluation
model.eval()
all_preds = []
all_labels = []
with torch.no_grad():
    for batch in test_loader:
        batch = batch.to(device)
        output = model(batch.x, batch.edge_index, batch.edge_attr)
        predictions = torch.argmax(output, dim=1)
        all_preds.append(predictions.cpu().numpy())
        all_labels.append(batch.y.cpu().numpy())

# Step 7: Calculate metrics
accuracy = accuracy_score(np.concatenate(all_labels), np.concatenate(all_preds))
micro_f1 = f1_score(np.concatenate(all_labels), np.concatenate(all_preds), average='micro')

print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')


TypeError: expected str, bytes or os.PathLike object, not Data