# A táblázatos comparison ötlet tesztelése

In [2]:
import networkx as nx
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import DataLoader, Dataset
from torch_geometric.nn.models import Node2Vec
from torch_geometric.utils import from_networkx

ModuleNotFoundError: No module named 'torch'

In [5]:
# Step 1: Create a Graph
G = nx.karate_club_graph()

data = from_networkx(G)

# Step 2: Generate Node2Vec Embeddings
node2vec = Node2Vec(data.edge_index, embedding_dim=64, walk_length=30, context_size=10, walks_per_node=200, num_negative_samples=1, p=1, q=1, sparse=True)

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
node2vec = node2vec.to(device)

ImportError: 'Node2Vec' requires either the 'pyg-lib' or 'torch-cluster' package

In [2]:
! pip install torch-cluster

Collecting torch-cluster
  Using cached torch_cluster-1.6.3.tar.gz (54 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'


  error: subprocess-exited-with-error
  
  × Getting requirements to build wheel did not run successfully.
  │ exit code: 1
  ╰─> [17 lines of output]
      Traceback (most recent call last):
        File "C:\Users\csaba\Documents\Coding\git_own\thesis_coding\.venv-thesis_coding-py3.10\lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 353, in <module>
          main()
        File "C:\Users\csaba\Documents\Coding\git_own\thesis_coding\.venv-thesis_coding-py3.10\lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 335, in main
          json_out['return_val'] = hook(**hook_input['kwargs'])
        File "C:\Users\csaba\Documents\Coding\git_own\thesis_coding\.venv-thesis_coding-py3.10\lib\site-packages\pip\_vendor\pyproject_hooks\_in_process\_in_process.py", line 118, in get_requires_for_build_wheel
          return hook(config_settings)
        File "C:\Users\csaba\AppData\Local\Temp\pip-build-env-lmclo0en\overlay\Lib\site-packages

In [None]:
# Train Node2Vec model
optimizer = torch.optim.SparseAdam(list(node2vec.parameters()), lr=0.01)

def train_node2vec():
    node2vec.train()
    total_loss = 0
    for epoch in range(100):
        optimizer.zero_grad()
        loss = node2vec.loss(data.edge_index)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if (epoch + 1) % 10 == 0:
            print(f'Epoch: {epoch + 1}, Loss: {total_loss / 10:.4f}')
            total_loss = 0

train_node2vec()

# Extract embeddings
embeddings = node2vec.embedding.weight.data.cpu().numpy()
labels = np.array([G.nodes[node]['club'] for node in G.nodes()])

# Encode labels to numeric values
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(embeddings, labels, test_size=0.3, random_state=42)

# Step 4: Create a PyTorch Dataset and DataLoader
class GraphDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = GraphDataset(X_train, y_train)
test_dataset = GraphDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Step 5: Define a Simple Classification Model
class NodeClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(NodeClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

input_dim = 64
hidden_dim = 32
output_dim = len(np.unique(labels))

model = NodeClassifier(input_dim, hidden_dim, output_dim).to(device)

# Step 6: Train the Model
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

epochs = 20
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")

# Step 7: Evaluate the Model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        outputs = model(X_batch)
        _, predicted = torch.max(outputs, 1)
        total += y_batch.size(0)
        correct += (predicted == y_batch).sum().item()

accuracy = correct / total
print(f"Test Accuracy: {accuracy:.4f}")
