In [None]:
!pip install torch
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0


In [None]:
import torch
import numpy as np
from torch_geometric.datasets import Planetoid

# بارگزاری داده‌ها و چاپ آمار دیتاست
def load_and_print_stats(dataset_name):
    dataset = Planetoid(root=f'/tmp/{dataset_name}', name=dataset_name)
    data = dataset[0]
    print(f"{dataset_name} Dataset Statistics:")
    print(f"Number of Nodes: {data.num_nodes}")
    print(f"Number of Edges: {data.num_edges}")
    print(f"Number of Classes: {dataset.num_classes}")
    print(f"Number of Node Features: {data.num_node_features}")
    return data, dataset.num_classes

# تقسیم داده‌ها به آموزش، اعتبارسنجی و تست
def split_data(data, dataset_name, train_ratio=0.7, val_ratio=0.1):
    num_nodes = data.num_nodes
    indices = np.random.permutation(num_nodes)

    train_size = int(num_nodes * train_ratio)
    val_size = int(num_nodes * val_ratio)

    data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)

    data.train_mask[indices[:train_size]] = True
    data.val_mask[indices[train_size:train_size + val_size]] = True
    data.test_mask[indices[train_size + val_size:]] = True

    # چاپ تعداد داده‌ها در هر بخش
    print(f"Data Split for {dataset_name}:")
    print(f"Training Nodes: {data.train_mask.sum().item()}")
    print(f"Validation Nodes: {data.val_mask.sum().item()}")
    print(f"Test Nodes: {data.test_mask.sum().item()}\n")

# بارگزاری داده‌های Cora و CiteSeer و چاپ آمارها
cora_data, cora_num_classes = load_and_print_stats('Cora')
citeseer_data, citeseer_num_classes = load_and_print_stats('CiteSeer')

# تقسیم داده‌های Cora و CiteSeer با اضافه کردن نام دیتاست به تابع
split_data(cora_data, 'Cora', 0.7, 0.1)
split_data(citeseer_data, 'CiteSeer', 0.7, 0.1)





Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index
Processing...
Done!
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x


Cora Dataset Statistics:
Number of Nodes: 2708
Number of Edges: 10556
Number of Classes: 7
Number of Node Features: 1433


Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...


CiteSeer Dataset Statistics:
Number of Nodes: 3327
Number of Edges: 9104
Number of Classes: 6
Number of Node Features: 3703
Data Split for Cora:
Training Nodes: 1895
Validation Nodes: 270
Test Nodes: 543

Data Split for CiteSeer:
Training Nodes: 2328
Validation Nodes: 332
Test Nodes: 667



Done!


## PPNP

In [None]:
import torch
import numpy as np
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, APPNP

# Data loading and preprocessing
def load_and_print_stats(dataset_name):
    dataset = Planetoid(root=f'/tmp/{dataset_name}', name=dataset_name)
    data = dataset[0]
    print(f"{dataset_name} Dataset Statistics:")
    print(f"Number of Nodes: {data.num_nodes}")
    print(f"Number of Edges: {data.num_edges}")
    print(f"Number of Classes: {dataset.num_classes}")
    print(f"Number of Node Features: {data.num_node_features}")
    return data, dataset.num_classes

def split_data(data, dataset_name, train_ratio=0.7, val_ratio=0.1):
    num_nodes = data.num_nodes
    indices = np.random.permutation(num_nodes)

    train_size = int(num_nodes * train_ratio)
    val_size = int(num_nodes * val_ratio)

    data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)

    data.train_mask[indices[:train_size]] = True
    data.val_mask[indices[train_size:train_size + val_size]] = True
    data.test_mask[indices[train_size + val_size:]] = True

    print(f"Data Split for {dataset_name}:")
    print(f"Training Nodes: {data.train_mask.sum().item()}")
    print(f"Validation Nodes: {data.val_mask.sum().item()}")
    print(f"Test Nodes: {data.test_mask.sum().item()}\n")

# Model definition
class PPNP(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super(PPNP, self).__init__()
        self.conv1 = GCNConv(num_features, 16)
        self.conv2 = GCNConv(16, num_classes)
        self.propagation = APPNP(K=10, alpha=0.1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.propagation(x, edge_index)
        return F.log_softmax(x, dim=1)

# Training and evaluation functions
def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluate(model, data):
    model.eval()
    out = model(data)
    _, pred = out.max(dim=1)
    correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
    acc = correct / int(data.test_mask.sum())
    return acc

# Load data and split
cora_data, cora_num_classes = load_and_print_stats('Cora')
citeseer_data, citeseer_num_classes = load_and_print_stats('CiteSeer')
split_data(cora_data, 'Cora', 0.7, 0.1)
split_data(citeseer_data, 'CiteSeer', 0.7, 0.1)

# Initialize models and optimizers
cora_model = PPNP(num_features=cora_data.num_node_features, num_classes=cora_num_classes)
citeseer_model = PPNP(num_features=citeseer_data.num_node_features, num_classes=citeseer_num_classes)
cora_optimizer = torch.optim.Adam(cora_model.parameters(), lr=0.01, weight_decay=5e-4)
citeseer_optimizer = torch.optim.Adam(citeseer_model.parameters(), lr=0.01, weight_decay=5e-4)

# Training loops

# Training loop for Cora dataset
for epoch in range(200):
    cora_loss = train(cora_model, cora_data, cora_optimizer)
    cora_train_acc = evaluate(cora_model, cora_data)  # Evaluating on the entire dataset
    print(f'Cora - Epoch: {epoch+1}, Loss: {cora_loss:.4f}, Training Acc: {cora_train_acc:.4f}')

# Training loop for CiteSeer dataset
for epoch in range(200):
    citeseer_loss = train(citeseer_model, citeseer_data, citeseer_optimizer)
    citeseer_train_acc = evaluate(citeseer_model, citeseer_data)  # Evaluating on the entire dataset
    print(f'CiteSeer - Epoch: {epoch+1}, Loss: {citeseer_loss:.4f}, Training Acc: {citeseer_train_acc:.4f}')


# # Evaluate on test set after training
# cora_test_acc = evaluate(cora_model, cora_data)
# print(f'\nCora - Test Accuracy: {cora_test_acc:.4f}')

# # Evaluate on test set after training
# citeseer_test_acc = evaluate(citeseer_model, citeseer_data)
# print(f'\nCiteSeer - Test Accuracy: {citeseer_test_acc:.4f}')


Cora Dataset Statistics:
Number of Nodes: 2708
Number of Edges: 10556
Number of Classes: 7
Number of Node Features: 1433
CiteSeer Dataset Statistics:
Number of Nodes: 3327
Number of Edges: 9104
Number of Classes: 6
Number of Node Features: 3703
Data Split for Cora:
Training Nodes: 1895
Validation Nodes: 270
Test Nodes: 543

Data Split for CiteSeer:
Training Nodes: 2328
Validation Nodes: 332
Test Nodes: 667

Cora - Epoch: 1, Loss: 1.9580, Training Acc: 0.2505
Cora - Epoch: 2, Loss: 1.8788, Training Acc: 0.3131
Cora - Epoch: 3, Loss: 1.8124, Training Acc: 0.2983
Cora - Epoch: 4, Loss: 1.7435, Training Acc: 0.3223
Cora - Epoch: 5, Loss: 1.6680, Training Acc: 0.3923
Cora - Epoch: 6, Loss: 1.5976, Training Acc: 0.4880
Cora - Epoch: 7, Loss: 1.5236, Training Acc: 0.5525
Cora - Epoch: 8, Loss: 1.4430, Training Acc: 0.5856
Cora - Epoch: 9, Loss: 1.3865, Training Acc: 0.6298
Cora - Epoch: 10, Loss: 1.3198, Training Acc: 0.6630
Cora - Epoch: 11, Loss: 1.2621, Training Acc: 0.7109
Cora - Epoch: 1

In [None]:
import torch
import numpy as np
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, APPNP

# Data loading and preprocessing
def load_and_print_stats(dataset_name):
    dataset = Planetoid(root=f'/tmp/{dataset_name}', name=dataset_name)
    data = dataset[0]
    print(f"{dataset_name} Dataset Statistics:")
    print(f"Number of Nodes: {data.num_nodes}")
    print(f"Number of Edges: {data.num_edges}")
    print(f"Number of Classes: {dataset.num_classes}")
    print(f"Number of Node Features: {data.num_node_features}")
    return data, dataset.num_classes

def split_data(data, dataset_name, train_ratio=0.7, val_ratio=0.1):
    num_nodes = data.num_nodes
    indices = np.random.permutation(num_nodes)

    train_size = int(num_nodes * train_ratio)
    val_size = int(num_nodes * val_ratio)

    data.train_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.val_mask = torch.zeros(num_nodes, dtype=torch.bool)
    data.test_mask = torch.zeros(num_nodes, dtype=torch.bool)

    data.train_mask[indices[:train_size]] = True
    data.val_mask[indices[train_size:train_size + val_size]] = True
    data.test_mask[indices[train_size + val_size:]] = True

    print(f"Data Split for {dataset_name}:")
    print(f"Training Nodes: {data.train_mask.sum().item()}")
    print(f"Validation Nodes: {data.val_mask.sum().item()}")
    print(f"Test Nodes: {data.test_mask.sum().item()}\n")

# Model definition
class PPNP(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super(PPNP, self).__init__()
        self.conv1 = GCNConv(num_features, 16)
        self.conv2 = GCNConv(16, num_classes)
        self.propagation = APPNP(K=10, alpha=0.1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = self.propagation(x, edge_index)
        return F.log_softmax(x, dim=1)

# Training and evaluation functions
def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()



def evaluate(model, data, mask):
    model.eval()
    out = model(data)
    _, pred = out.max(dim=1)
    correct = pred[mask].eq(data.y[mask]).sum().item()
    acc = correct / int(mask.sum())
    return acc

# Training loop for Cora dataset
for epoch in range(200):
    cora_loss = train(cora_model, cora_data, cora_optimizer)

    # Optional: Evaluate on validation set during training
    # cora_val_acc = evaluate(cora_model, cora_data, cora_data.val_mask)
    # print(f'Cora - Epoch: {epoch+1}, Validation Acc: {cora_val_acc:.4f}')

# Evaluate on test set after training
cora_test_acc = evaluate(cora_model, cora_data, cora_data.test_mask)
print(f'\nCora - Final Test Accuracy: {cora_test_acc:.4f}')

# Repeat for CiteSeer dataset
for epoch in range(200):
    citeseer_loss = train(citeseer_model, citeseer_data, citeseer_optimizer)

# Evaluate on test set after training
citeseer_test_acc = evaluate(citeseer_model, citeseer_data, citeseer_data.test_mask)
print(f'\nCiteSeer - Final Test Accuracy: {citeseer_test_acc:.4f}')





Cora - Final Test Accuracy: 0.8674

CiteSeer - Final Test Accuracy: 0.7331


## ModifiedPPNP
## قسمت ز سوال 2

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, APPNP
import torch.optim as optim

class ModifiedPPNP(torch.nn.Module):
    def __init__(self, num_features, num_classes, hidden_dim=16, K=10, alpha=0.1):
        super(ModifiedPPNP, self).__init__()
        self.lin = torch.nn.Linear(num_features, hidden_dim)
        self.conv1 = GCNConv(hidden_dim, hidden_dim)
        self.W = torch.nn.Parameter(torch.Tensor(hidden_dim, hidden_dim))
        self.propagation = APPNP(K, alpha)
        self.lin2 = torch.nn.Linear(hidden_dim, num_classes)
        self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.xavier_uniform_(self.W.data)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.lin(x))
        x = F.relu(self.conv1(x, edge_index))
        x = torch.matmul(x, self.W)
        x = self.propagation(x, edge_index)
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluate(model, data):
    model.eval()
    out = model(data)
    _, pred = out.max(dim=1)
    correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
    acc = correct / int(data.test_mask.sum())
    return acc

# Load datasets
datasets = {
    'Cora': Planetoid(root='/tmp/Cora', name='Cora'),
    'CiteSeer': Planetoid(root='/tmp/CiteSeer', name='CiteSeer')
}

for name, dataset in datasets.items():
    data = dataset[0]
    model = ModifiedPPNP(num_features=dataset.num_node_features, num_classes=dataset.num_classes)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

    for epoch in range(200):
        loss = train(model, data, optimizer)
        if epoch % 10 == 0:
            acc = evaluate(model, data)
            print(f'{name} Epoch {epoch}: Loss: {loss:.4f}, Test Acc: {acc:.4f}')


Cora Epoch 0: Loss: 1.9542, Test Acc: 0.0910
Cora Epoch 10: Loss: 1.1919, Test Acc: 0.6070
Cora Epoch 20: Loss: 0.2109, Test Acc: 0.7320
Cora Epoch 30: Loss: 0.0148, Test Acc: 0.7470
Cora Epoch 40: Loss: 0.0026, Test Acc: 0.7520
Cora Epoch 50: Loss: 0.0017, Test Acc: 0.7570
Cora Epoch 60: Loss: 0.0026, Test Acc: 0.7650
Cora Epoch 70: Loss: 0.0041, Test Acc: 0.7790
Cora Epoch 80: Loss: 0.0050, Test Acc: 0.7750
Cora Epoch 90: Loss: 0.0048, Test Acc: 0.7800
Cora Epoch 100: Loss: 0.0044, Test Acc: 0.7800
Cora Epoch 110: Loss: 0.0041, Test Acc: 0.7850
Cora Epoch 120: Loss: 0.0039, Test Acc: 0.7850
Cora Epoch 130: Loss: 0.0038, Test Acc: 0.7850
Cora Epoch 140: Loss: 0.0036, Test Acc: 0.7840
Cora Epoch 150: Loss: 0.0034, Test Acc: 0.7880
Cora Epoch 160: Loss: 0.0033, Test Acc: 0.7870
Cora Epoch 170: Loss: 0.0032, Test Acc: 0.7860
Cora Epoch 180: Loss: 0.0031, Test Acc: 0.7890
Cora Epoch 190: Loss: 0.0030, Test Acc: 0.7870
CiteSeer Epoch 0: Loss: 1.8007, Test Acc: 0.1600
CiteSeer Epoch 10: Los

In [None]:
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GCNConv, APPNP
import torch.optim as optim

class ModifiedPPNP(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super().__init__()
        self.lin = torch.nn.Linear(num_features, 16)
        self.conv1 = GCNConv(16, 16)
        self.W = torch.nn.Parameter(torch.Tensor(16, 16))
        self.propagation = APPNP(K=10, alpha=0.1)
        self.lin2 = torch.nn.Linear(16, num_classes)
        self.reset_parameters()

    def reset_parameters(self):
        torch.nn.init.xavier_uniform_(self.W.data)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.lin(x))
        x = F.relu(self.conv1(x, edge_index))
        x = torch.matmul(x, self.W)
        x = self.propagation(x, edge_index)
        x = self.lin2(x)
        return F.log_softmax(x, dim=1)

def train(model, data, optimizer):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def evaluate(model, data):
    model.eval()
    out = model(data)
    _, pred = out.max(dim=1)
    correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
    acc = correct / int(data.test_mask.sum())
    return acc

datasets = {'Cora': Planetoid(root='/tmp/Cora', name='Cora'), 'CiteSeer': Planetoid(root='/tmp/CiteSeer', name='CiteSeer')}

for name, dataset in datasets.items():
    data = dataset[0]
    model = ModifiedPPNP(num_features=dataset.num_node_features, num_classes=dataset.num_classes)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    for epoch in range(200):
        loss = train(model, data, optimizer)
    test_acc = evaluate(model, data)
    print(f'{name} - Final Test Accuracy: {test_acc:.4f}')


Cora - Final Test Accuracy: 0.7670
CiteSeer - Final Test Accuracy: 0.6180
