<a href="https://colab.research.google.com/github/bodadineshreddy/indictrans2/blob/main/GNNA2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# %%capture
!pip uninstall torch-scatter torch-cluster torch-spline-conv torch-sparse -y
!pip install torch-scatter torch-cluster torch-spline-conv torch-sparse -f https://data.pyg.org/whl/torch-2.1.0+cu121.html
!pip install torch-geometric ogb
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121




Found existing installation: torch-geometric 2.6.1
Uninstalling torch-geometric-2.6.1:
  Successfully uninstalled torch-geometric-2.6.1
Found existing installation: torch-scatter 2.1.2+pt21cu121
Uninstalling torch-scatter-2.1.2+pt21cu121:
  Successfully uninstalled torch-scatter-2.1.2+pt21cu121
Found existing installation: torch-sparse 0.6.18+pt21cu121
Uninstalling torch-sparse-0.6.18+pt21cu121:
  Successfully uninstalled torch-sparse-0.6.18+pt21cu121
Found existing installation: torch-cluster 1.6.3+pt21cu121
Uninstalling torch-cluster-1.6.3+pt21cu121:
  Successfully uninstalled torch-cluster-1.6.3+pt21cu121
Found existing installation: torch-spline-conv 1.2.2+pt21cu121
Uninstalling torch-spline-conv-1.2.2+pt21cu121:
  Successfully uninstalled torch-spline-conv-1.2.2+pt21cu121
Looking in links: https://data.pyg.org/whl/torch-2.1.0+cu121.html
Collecting torch-scatter
  Using cached https://data.pyg.org/whl/torch-2.1.0%2Bcu121/torch_scatter-2.1.2%2Bpt21cu121-cp311-cp311-linux_x86_64.whl 

In [4]:
import torch
print(torch.__version__)

2.5.1+cu124


In [5]:
import ogb.utils.url  # Import the full module so you can access ogb.utils.url

# Override the decide_download function to skip user input
def decide_download(url):
    print(f"Auto-approving download for: {url}")
    return True

ogb.utils.url.decide_download = decide_download  # Apply the patch

In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.loader import DataLoader
from sklearn.metrics import roc_auc_score
import random
from collections import Counter
from ogb.graphproppred import PygGraphPropPredDataset

# Check CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load dataset
dataset = PygGraphPropPredDataset(name="ogbg-molhiv", root="dataset/")
split_idx = dataset.get_idx_split()
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=32, shuffle=True)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=32, shuffle=False)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=32, shuffle=False)

# Generate Anonymous Walk Embeddings
def generate_anonymous_walk_embeddings(batch, walk_length=5, num_walks=100):
    """Generates anonymous walk embeddings for a batch of graphs."""
    all_anonymous_walks = set()
    batch_embeddings = []

    for graph in batch:  # 🔹 Directly iterate over batch (graph objects)
        edge_index = graph.edge_index
        num_nodes = graph.num_nodes
        anonymous_walk_counts = Counter()

        for _ in range(num_walks):
            start_node = random.randint(0, num_nodes - 1)
            walk = [start_node]
            for _ in range(walk_length - 1):
                neighbors = edge_index[1][edge_index[0] == walk[-1]]
                if len(neighbors) > 0:
                    walk.append(random.choice(neighbors.tolist()))

            anonymous_walk = tuple(walk.index(n) for n in walk)
            anonymous_walk_counts[anonymous_walk] += 1
            all_anonymous_walks.add(anonymous_walk)

        batch_embeddings.append(anonymous_walk_counts)

    # Convert to tensor
    all_anonymous_walks = list(all_anonymous_walks)
    embedding_matrix = torch.tensor(
        [[graph.get(walk, 0) for walk in all_anonymous_walks] for graph in batch_embeddings],
        dtype=torch.float32
    ).to(device)

    return embedding_matrix, all_anonymous_walks


# Dynamically Determine Input Dimension
sample_batch, _ = generate_anonymous_walk_embeddings([dataset[0]])
input_dim = sample_batch.shape[1]
print(f"Detected input dimension: {input_dim}")


# Define Model
class GraphClassifier(nn.Module):
    """Graph classification model."""
    def __init__(self, input_dim, hidden_dim=128, num_classes=1):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        return self.fc2(self.relu(self.fc1(x)))

# Training Function
def train(model, train_loader, optimizer, loss_fn, device):
    model.train()
    total_loss = 0

    for batch in train_loader:
        batch = batch.to(device)
        labels = batch.y.float().view(-1, 1).to(device)  # 🔹 Ensure labels are [batch_size, 1]

        # 🔹 Fix: Process the entire batch (not just one graph)
        embeddings, _ = generate_anonymous_walk_embeddings(batch, walk_length=5, num_walks=100)

        optimizer.zero_grad()
        outputs = model(embeddings)  # 🔹 Ensure outputs match batch size

        # 🔹 Debugging: Print shapes before loss computation
        print(f"🔹 Train - Outputs shape: {outputs.shape}, Labels shape: {labels.shape}")

        loss = loss_fn(outputs, labels)  # Shapes must match
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    return total_loss / len(train_loader)




# Evaluation Function
def evaluate(model, data_loader, device):
    model.eval()
    y_true, y_pred = [], []

    with torch.no_grad():
        for batch in data_loader:
            batch = batch.to(device)
            labels = batch.y.float().view(-1, 1).to(device)  # 🔹 Ensure labels are [batch_size, 1]

            embeddings, _ = generate_anonymous_walk_embeddings(batch, walk_length=5, num_walks=100)

            outputs = torch.sigmoid(model(embeddings))  # 🔹 Ensure correct shape

            # 🔹 Debugging: Print shapes
            print(f"🔹 Evaluation - Outputs shape: {outputs.shape}, Labels shape: {labels.shape}")

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(outputs.cpu().numpy())

    return roc_auc_score(y_true, y_pred)



# Initialize Model & Train
model = GraphClassifier(input_dim=input_dim, num_classes=1).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.BCEWithLogitsLoss()

# Training Loop
num_epochs = 5
for epoch in range(num_epochs):
    train_loss = train(model, train_loader, optimizer, loss_fn, device)
    val_auc = evaluate(model, valid_loader, device)
    print(f"Epoch {epoch+1}/{num_epochs}: Loss={train_loss:.4f}, Val AUC={val_auc:.4f}")


Using device: cuda
Detected input dimension: 9


  self.data, self.slices = torch.load(self.processed_paths[0])


AttributeError: 'tuple' object has no attribute 'edge_index'