# MNIST KNN Graph

In [2]:
import argparse
import os.path as osp
import random
import time

import graphlearning as gl
import numpy as np
import pygsp
import rustworkx as rwx
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
import pandas as pd 
import os

from torch_geometric.data import Data
from torch_geometric.datasets import Planetoid
from torch_geometric.logging import log
from torch_geometric.nn import GCNConv
import sys
sys.path.append(os.path.abspath(os.path.join('..')))
from src import get_num_edges_from_adj_matrix, generate_train_and_test_mask, GCN, eval_model, spectral_sparsify


In [3]:
if torch.cuda.is_available():
    device = torch.device("cuda")
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

## Loading the dataset

In [4]:
ds_name = "mnist"
print(f"Benchmarking: {ds_name} dataset")

num_features = 784
num_classes = 10
print(f"    > {num_features} features")
print(f"    > {num_classes} classes")

# Loading the dataset :
source_data, labels = gl.datasets.load("mnist")

N = source_data.shape[0]

Benchmarking: mnist dataset
    > 784 features
    > 10 classes


## Subsampling the dataset

In [8]:
subsample_size = 2000
N = subsample_size
print(f"    > Running on a subsample of {subsample_size} Nodes")
indices = np.arange(0, source_data.shape[0], 1, dtype=int)
np.random.shuffle(indices)
chosen_indices = indices[:subsample_size]
source_data = source_data[chosen_indices, :]
labels = labels[chosen_indices]

K_range = [i for i in range(10,101,10)]
N_EPOCHS = 100

    > Running on a subsample of 2000 Nodes


## Full graph

In [9]:
#stats for the full graph
full_graph_training_time = np.zeros(len(K_range))
full_graph_training_time_per_epoch = np.zeros(len(K_range))
full_graph_accuracies = np.zeros(((len(K_range), N_EPOCHS)))
full_graph_best_accuracies = np.zeros(len(K_range))
full_graph_num_edges = np.zeros(len(K_range))
full_graph_sparsification_time = np.zeros(len(K_range))

In [11]:
for k_idx, k in enumerate(K_range):
    print(f"contructing the knn weightmatrix for k = {k}...")
    start_knn_weight_matrix_time = time.time()
    knn_weight_matrix = gl.weightmatrix.knn(source_data, k=k)
    knn_weight_matrix_time = time.time() - start_knn_weight_matrix_time
    full_graph_sparsification_time[k_idx] = knn_weight_matrix_time 
    print(f"done in {knn_weight_matrix_time:.4f}")
    print(f"the graph has {get_num_edges_from_adj_matrix(knn_weight_matrix.toarray())} edges")       
    full_graph_num_edges[k_idx] = get_num_edges_from_adj_matrix(knn_weight_matrix.toarray()) 
    train_mask, test_mask = generate_train_and_test_mask(N)
    
    full_graph_x = torch.tensor(source_data, dtype=torch.float)
    full_graph_y = torch.tensor(labels, dtype=torch.long)
    full_graph_edge_index = (
        torch.tensor(knn_weight_matrix.toarray()).nonzero().t().contiguous()
    )
    full_graph_data = Data(x=full_graph_x, edge_index=full_graph_edge_index, y=full_graph_y).to(device)
    # NOTE: des num_nodes update after having cut the dataset to get the subsample ?
    full_graph_model = GCN(num_features, 16, num_classes).to(device)
    
    full_graph_optimizer = torch.optim.Adam([
        dict(params=full_graph_model.conv1.parameters(), weight_decay=5e-4),
        dict(params=full_graph_model.conv2.parameters(), weight_decay=0)
    ], lr=0.01)  # Only perform weight-decay on first convolution.
    
    best_val_acc, accs, median_time_per_epoch, overall_time = eval_model(full_graph_model, 
                                                                            full_graph_data,
                                                                            train_mask,
                                                                            test_mask, 
                                                                            full_graph_optimizer,
                                                                            N_EPOCHS)

contructing the knn weightmatrix for k = 10...
done in 0.8004
the graph has 14536 edges
Epoch: 001, Loss: 2.3620, Train: 0.2880, Test: 0.2940
Epoch: 010, Loss: 0.9971, Train: 0.7460, Test: 0.7040
Epoch: 020, Loss: 0.6323, Train: 0.8770, Test: 0.8410
Epoch: 030, Loss: 0.4624, Train: 0.8910, Test: 0.8760
Epoch: 040, Loss: 0.4207, Train: 0.8990, Test: 0.8830
Epoch: 050, Loss: 0.3971, Train: 0.9060, Test: 0.8860
Epoch: 060, Loss: 0.3504, Train: 0.9150, Test: 0.8930
Epoch: 070, Loss: 0.3195, Train: 0.9140, Test: 0.9000
Epoch: 080, Loss: 0.2953, Train: 0.9170, Test: 0.8970
Epoch: 090, Loss: 0.2755, Train: 0.9160, Test: 0.8970
Epoch: 100, Loss: 0.2932, Train: 0.9230, Test: 0.9050
contructing the knn weightmatrix for k = 20...
done in 0.8070
the graph has 28840 edges
Epoch: 001, Loss: 2.3233, Train: 0.3570, Test: 0.3650
Epoch: 010, Loss: 1.0967, Train: 0.7230, Test: 0.7110
Epoch: 020, Loss: 0.6789, Train: 0.8230, Test: 0.8180
Epoch: 030, Loss: 0.4722, Train: 0.8660, Test: 0.8550
Epoch: 040, Lo

# Spectral Sparsified Graph

In [12]:
#stats for the spectral sparsification
spectral_training_time = np.zeros(len(K_range))
spectral_graph_training_time_per_epoch = np.zeros(len(K_range))
spectral_accuracies = np.zeros(((len(K_range), N_EPOCHS))) 
spectral_best_accuracies = np.zeros(len(K_range))
spectral_num_edges = np.zeros(len(K_range))
spectral_sparsification_time = np.zeros(len(K_range))

In [None]:
print(f"constructing the spectral sparisfied graph using effective resistance")
start_spectral_sparsify = time.time() 
spectral_sparse_knn_weight_matrix = spectral_sparsify(np.nan_to_num((1/knn_weight_matrix.todense())-1, posinf = 0, neginf = 0))
spectral_sparsify_time = time.time() - start_spectral_sparsify
print(f"done in {spectral_sparsify_time:.4f}s")
spectral_sparsification_time[k_idx] = spectral_sparsify_time
spectral_num_edges[k_idx] = get_num_edges(spectral_sparse_knn_weight_matrix.toarray())


    
print(f'-------------------------- Running the model Spectral sparsified |E| = {spectral_num_edges[k_idx]}, with K = {k} --------------------------')
# Running the model on the spectral sparsified graph
spectral_x = torch.tensor(source_data, dtype=torch.float)
spectral_y = torch.tensor(labels, dtype=torch.long)
spectral_edge_index = (
    torch.tensor(spectral_sparse_knn_weight_matrix.toarray()).nonzero().t().contiguous()
)
spectral_data = Data(x=spectral_x, edge_index=spectral_edge_index, y=spectral_y).to(device)
# NOTE: des num_nodes update after having cut the dataset to get the subsample ?
spectral_model = GCN(num_features, 16, num_classes).to(device)

spectral_optimizer = torch.optim.Adam([
    dict(params=spectral_model.conv1.parameters(), weight_decay=5e-4),
    dict(params=spectral_model.conv2.parameters(), weight_decay=0)
], lr=0.01)  # Only perform weight-decay on first convolution.

best_val_acc, accs, median_time_per_epoch, overall_time = eval_model(spectral_model, 
                                                                        spectral_data,
                                                                        train_mask,
                                                                        test_mask, 
                                                                        spectral_optimizer,
                                                                        N_EPOCHS)