In [1]:
import copy
import matplotlib.pyplot as plt
import pickle
from helpers import *
from torch_geometric.datasets import Planetoid
torch.set_printoptions(precision=2,sci_mode=False, linewidth=200)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using device:', device)
loss_fn = nn.BCELoss()

Using device: cpu


## Learners and Plot Helpers

In [2]:
datasets = [Planetoid(root='data/Cora/', name='Cora'), Planetoid(root='data/PubMed/', name='PubMed'), Planetoid(root='data/CiteSeer/', name='CiteSeer')]
model_types = [
    'mlp', # 2 layer MLPs
    'gcn2l10', # 2 layer GCN with 1 GC at layer 1
    # 'gcn2l01', # 2 layer GCN with 1 GC at layer 2
    # 'gcn2l20', # 2 layer GCN with 2 GCs at layer 1
    # 'gcn2l02', # 2 layer GCN with 2 GCs at layer 2
    # 'gcn2l11', # 2 layer GCN with 1 GC each at layers 1 and 2
    # 'gcn3l100', # 3 layer GCN with 1 GC at layer 1
    # 'gcn3l010', # 3 layer GCN with 1 GC at layer 2
    # 'gcn3l001', # 3 layer GCN with 1 GC at layer 3
    # 'gcn3l200', # 3 layer GCN with 2 GCs at layer 1
    # 'gcn3l020', # 3 layer GCN with 2 GCs at layer 2
    # 'gcn3l002', # 3 layer GCN with 2 GCs at layer 3
    # 'gcn3l110', # 3 layer GCN with 1 GC each at layers 1 and 2
    # 'gcn3l101', # 3 layer GCN with 1 GC each at layers 1 and 3
    # 'gcn3l011', # 3 layer GCN with 1 GC each at layers 2 and 3
]

# Initialize all learning models.
def init_learners(channels_2l, channels_3l):
    learners = {}
    learners['mlp'] = MLP(n_layers=2, n_features=n_features, channels=channels_2l)
    learners['gcn2l10'] = GCN(n_layers=2, n_features=n_features, convolutions=[1, 0], channels=channels_2l)
    learners['gcn2l01'] = GCN(n_layers=2, n_features=n_features, convolutions=[0, 1], channels=channels_2l)
    learners['gcn2l20'] = GCN(n_layers=2, n_features=n_features, convolutions=[0, 2], channels=channels_2l)
    learners['gcn2l02'] = GCN(n_layers=2, n_features=n_features, convolutions=[2, 0], channels=channels_2l)
    learners['gcn2l11'] = GCN(n_layers=2, n_features=n_features, convolutions=[1, 1], channels=channels_2l)
    learners['gcn3l100'] = GCN(n_layers=3, n_features=n_features, convolutions=[1, 0, 0], channels=channels_3l)
    learners['gcn3l010'] = GCN(n_layers=3, n_features=n_features, convolutions=[0, 1, 0], channels=channels_3l)
    learners['gcn3l001'] = GCN(n_layers=3, n_features=n_features, convolutions=[0, 0, 1], channels=channels_3l)
    learners['gcn3l200'] = GCN(n_layers=3, n_features=n_features, convolutions=[2, 0, 0], channels=channels_3l)
    learners['gcn3l020'] = GCN(n_layers=3, n_features=n_features, convolutions=[0, 2, 0], channels=channels_3l)
    learners['gcn3l002'] = GCN(n_layers=3, n_features=n_features, convolutions=[0, 0, 2], channels=channels_3l)
    learners['gcn3l110'] = GCN(n_layers=3, n_features=n_features, convolutions=[1, 1, 0], channels=channels_3l)
    learners['gcn3l101'] = GCN(n_layers=3, n_features=n_features, convolutions=[1, 0, 1], channels=channels_3l)
    learners['gcn3l011'] = GCN(n_layers=3, n_features=n_features, convolutions=[0, 1, 1], channels=channels_3l)
    return learners

line_wd = 2 # line width for plots.
marker_sz = 4 # marker size for plots.
labels = {
    'mlp': 'No Graph',
    'gcn2l10': '2 layers 1 GC at layer 1',
    'gcn2l01': '2 layers 1 GC at layer 2',
    'gcn2l20': '2 layers 2 GCs at layer 1',
    'gcn2l02': '2 layers 2 GCs at layer 2',
    'gcn2l11': '2 layers 1 GC each at layers 1 and 2',
    'gcn3l100': '3 layers 1 GC at layer 1',
    'gcn3l010': '3 layers 1 GC at layer 2',
    'gcn3l001': '3 layers 1 GC at layer 3',
    'gcn3l200': '3 layers 2 GCs at layer 1',
    'gcn3l020': '3 layers 2 GCs at layer 2',
    'gcn3l002': '3 layers 2 GCs at layer 3',
    'gcn3l110': '3 layers 1 GC each at layers 1 and 2',
    'gcn3l101': '3 layers 1 GC each at layers 1 and 3',
    'gcn3l011': '3 layers 1 GC each at layers 2 and 3',
}

def plot_for_models(x, ys, yerrs, x_label, y_label, labels, filename=None):
    fig = plt.figure(figsize=(20,5), facecolor=[1,1,1])
    plt.xscale('log')
    plt.yscale('log')

    plt.xlabel(x_label, fontsize=18)
    plt.ylabel(y_label, fontsize=18)
    
    for model_type in model_types:
        y = np.asarray(ys[model_type])
        yerr = np.asarray(yerrs[model_type])
        plt.plot(x, y, linewidth=line_wd, linestyle= '-', marker='.', markersize=marker_sz, label=labels[model_type])
        plt.fill_between(x, y - yerr, y + yerr, alpha=0.2)
    plt.legend()
    plt.show()
    if filename is not None:
        fig.savefig("figures/" + filename, dpi=400, bbox_inches='tight')

## Train and test all models

In [None]:
# Fix max number of epochs, and number of trials.
epochs = 1000
n_trials = 10

# Metrics to plot.
train_acc = {}
train_acc_std = {}
train_loss = {}
train_loss_std = {}
test_acc = {}
test_acc_std = {}
test_loss = {}
test_loss_std = {}
Ks = {}

for dataset in datasets:
    data = dataset[0].to(device)
    data.x_orig = copy.deepcopy(data.x)
    n_points, n_features = data.x.shape
    n_classes = data.y.max() + 1
    print('\n' + dataset.name + ': Classes =', n_classes.item(), '| Points =', n_points, '| Features =', n_features)
    train_acc[dataset.name] = {}
    train_acc_std[dataset.name] = {}
    train_loss[dataset.name] = {}
    train_loss_std[dataset.name] = {}
    test_acc[dataset.name] = {}
    test_acc_std[dataset.name] = {}
    test_loss[dataset.name] = {}
    test_loss_std[dataset.name] = {}
    Ks[dataset.name] = {}

    for class_label in range(n_classes):
        print("\nWorking for", dataset.name, 'class', class_label)
        train_acc[dataset.name][class_label] = {}
        train_acc_std[dataset.name][class_label] = {}
        train_loss[dataset.name][class_label] = {}
        train_loss_std[dataset.name][class_label] = {}
        test_acc[dataset.name][class_label] = {}
        test_acc_std[dataset.name][class_label] = {}
        test_loss[dataset.name][class_label] = {}
        test_loss_std[dataset.name][class_label] = {}
        y = torch.zeros(n_points).to(device)
        idx_mask = data.y == class_label
        y[idx_mask] = 1
        data.y_ = y
        u = data.x_orig[idx_mask].mean(dim=0)
        v = data.x_orig[~idx_mask].mean(dim=0)
        
        # Fix network architectures for the networks.
        channels_2l = [[n_features, 16], [16, 1]]
        channels_3l = [[n_features, 16, 16], [16, 16, 1]]
        
        train_acc_list = {}
        train_loss_list = {}
        test_acc_list = {}
        test_loss_list = {}
        for model_type in model_types:
            train_acc[dataset.name][class_label][model_type] = []
            train_acc_std[dataset.name][class_label][model_type] = []
            train_loss[dataset.name][class_label][model_type] = []
            train_loss_std[dataset.name][class_label][model_type] = []
            test_acc[dataset.name][class_label][model_type] = []
            test_acc_std[dataset.name][class_label][model_type] = []
            test_loss[dataset.name][class_label][model_type] = []
            test_loss_std[dataset.name][class_label][model_type] = []
        
        dist = torch.norm(u-v)
        K = dist/(2*np.sqrt(n_features))
        K_ub = 10*K
        K_lb = 0.01*K
        Ks[dataset.name][class_label] = np.geomspace(K_lb, K_ub, 5)
        for (i, K) in enumerate(Ks[dataset.name][class_label]):
            u_ = K*torch.ones(n_features).to(device)
            v_ = -K*torch.ones(n_features).to(device)
            data.x[idx_mask] = data.x_orig[idx_mask] - u + u_
            data.x[~idx_mask] = data.x_orig[~idx_mask] - v + v_
            
            for model_type in model_types:
                train_acc_list[model_type] = torch.zeros(n_trials)
                train_loss_list[model_type] = torch.zeros(n_trials)
                test_acc_list[model_type] = torch.zeros(n_trials)
                test_loss_list[model_type] = torch.zeros(n_trials)
            
            for t in range(n_trials):
                logs = 'Progress (class ' + str(class_label) + ', ' + model_type + '): ' + str(round(100*(t)*(i+1)/(len(Ks)*n_trials), 2)) + '%.'
                learners = init_learners(channels_2l, channels_3l)
                for model_type in model_types:
                    learner = learners[model_type].to(device)
                    train_model(learner, data, loss_fn, lr=0.01, epochs=epochs, logs=logs)
                    learner.eval()
                    pred = learner(data)
                    train_acc_list[model_type][t] = accuracy(pred[data.train_mask], data.y_[data.train_mask]).item()
                    train_loss_list[model_type][t] = loss_fn(pred[data.train_mask], data.y_[data.train_mask].float()).item()
                    test_acc_list[model_type][t] = accuracy(pred[data.test_mask], data.y_[data.test_mask]).item()
                    test_loss_list[model_type][t] = loss_fn(pred[data.test_mask], data.y_[data.test_mask].float()).item()
            
            for model_type in model_types:
                train_acc[dataset.name][class_label][model_type].append(train_acc_list[model_type].mean().item())
                train_acc_std[dataset.name][class_label][model_type].append(train_acc_list[model_type].std(unbiased=False).item())
                train_loss[dataset.name][class_label][model_type].append(train_loss_list[model_type].mean().item())
                train_loss_std[dataset.name][class_label][model_type].append(train_loss_list[model_type].std(unbiased=False).item())
                test_acc[dataset.name][class_label][model_type].append(test_acc_list[model_type].mean().item())
                test_acc_std[dataset.name][class_label][model_type].append(test_acc_list[model_type].std(unbiased=False).item())
                test_loss[dataset.name][class_label][model_type].append(test_loss_list[model_type].mean().item())
                test_loss_std[dataset.name][class_label][model_type].append(test_loss_list[model_type].std(unbiased=False).item())
        
        # Plot the metrics for this dataset and class label.
        filename = dataset.name + '_cls' + str(class_label) + '_train_acc.pdf' # Can be replaced with filename (with extension, e.g. 'fig.pdf') to save the next plot.
        plot_for_models(
            Ks[dataset.name][class_label],
            train_acc[dataset.name][class_label],
            train_acc_std[dataset.name][class_label],
            'Dist b/w means', 'Training Accuracy',
            labels, filename)
        filename = dataset.name + '_cls' + str(class_label) + '_test_acc.pdf' # Can be replaced with filename (with extension, e.g. 'fig.pdf') to save the next plot.
        plot_for_models(
            Ks[dataset.name][class_label],
            test_acc[dataset.name][class_label],
            test_acc_std[dataset.name][class_label],
            'Dist b/w means', 'Test Accuracy',
            labels, filename)
        filename = dataset.name + '_cls' + str(class_label) + '_train_loss.pdf' # Can be replaced with filename (with extension, e.g. 'fig.pdf') to save the next plot.
        plot_for_models(
            Ks[dataset.name][class_label],
            train_loss[dataset.name][class_label],
            train_loss_std[dataset.name][class_label],
            'Dist b/w means', 'Training Loss',
            labels, filename)
        filename = dataset.name + '_cls' + str(class_label) + '_test_loss.pdf' # Can be replaced with filename (with extension, e.g. 'fig.pdf') to save the next plot.
        plot_for_models(
            Ks[dataset.name][class_label],
            test_acc[dataset.name][class_label],
            test_acc_std[dataset.name][class_label],
            'Dist b/w means', 'Test Loss',
            labels, filename)


Cora: Classes = 7 | Points = 2708 | Features = 1433

Working for Cora class 0
Progress (class 0, gcn2l10): 20.0%. Loss: 0.002				