In [None]:
import wandb
wandb.login()

In [None]:
from GraphColor.dataloader import ColorDataset, ColorMultiDataset, RandColoring, ColoringOneHot
from torch_geometric.nn.models import GAT
from torch_geometric.loader import DataLoader
import torch_geometric.transforms
import torch
import torch.multiprocessing as mp
from functools import partial
import torch.nn.functional as F
import os
#from torch.utils.tensorboard import SummaryWriter

from numpy.random import default_rng
import math

#%%
from torch_geometric.utils import to_networkx
from torch_geometric.nn import SAGEConv, GATv2Conv, GCNConv, global_mean_pool, BatchNorm, LayerNorm
import matplotlib.pyplot as plt
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

hypers = {
    'num_features': 32,
    'embedding_dim': 64

}
def min_size(data, n):
    return data.x.shape[0] > n
NUM_PROCESSES = 4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

pre_transforms = torch_geometric.transforms.Compose(
    [torch_geometric.transforms.ToUndirected(), RandColoring(hypers['num_features'])]) #, ColoringOneHot(hypers['num_features'], cat=False), RandColoring(hypers['num_features'])
transforms = torch_geometric.transforms.Compose(
   [ torch_geometric.transforms.ToDevice(device)])

# def min_size(n, data):
#    1
#    return data.x.shape[0] > n



filters = partial(min_size, n=50)  #curry the funtion to keep graphs with more than 50 nodes
# torch_geometric.transforms.ComposeFilters([partial(min_size, n=50)])
# length no filter 11929

import torch
from torch.nn import Dropout, Linear
import torch.nn.functional as F
# from torch.nn import Linear

class N_GCP(torch.nn.Module):
    """
    Based on the Network used in Graph Coloring with Physics-Inspired Graph Neural Networks.
    In the paper they used a 2 Conv layer Network.
    In this approach the Conv was replaced with Transformers.
    """
    def __init__(self, num_features, hidden_dim, num_classes):
        super(AmazonNet, self).__init__()
        self.conv1 = SAGEConv(num_features, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, num_classes)
        self.dropout = torch.nn.Dropout(p=0.2)
        


    def forward(self, x, edge_index, batch):
        #x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index) 
        x = x.relu()
        x = self.dropout(x)
        x = self.conv2(x, edge_index)

        return x

class AmazonNet(torch.nn.Module):
    """
    Based on the Network used in Graph Coloring with Physics-Inspired Graph Neural Networks.
    In the paper they used a 2 Conv layer Network.
    In this approach the Conv was replaced with Transformers.
    """
    def __init__(self, num_features, hidden_dim, num_classes, n_heads=3):
        super(AmazonNet, self).__init__()
        self.conv1 = SAGEConv(num_features, hidden_dim)
        self.algo_classifier = Linear(hidden_dim, num_classes)
        self.dropout = torch.nn.Dropout(p=0.2)
        


    def forward(self, x, edge_index, batch):
        #x, edge_index = data.x, data.edge_index
        pre = self.conv1(x, edge_index) 
        post = global_mean_pool(pre, batch)
        x = self.algo_classifier(post)
        return x, pre, post



graph_dataset = ColorMultiDataset(root='data/', pre_transform=pre_transforms, transform=transforms, pre_filter=filters)
for i, data in enumerate(graph_dataset):
    try:
        if not data.validate():
            print(f"Error in data entry No:{i} name:{data.name}")
    except ValueError:
        print(f"IndexError in data entry No:{i} name:{data.name}")
        continue
rng = default_rng()
choice = rng.permutation(len(graph_dataset))
idx = math.floor(len(graph_dataset)*0.8)
train_set = graph_dataset[0:idx]
test_set = graph_dataset[idx:-1]
loader_train = DataLoader(train_set, batch_size=1, shuffle=True, num_workers=0, pin_memory=False)
loader_test = DataLoader(test_set, batch_size=1, shuffle=True, num_workers=0, pin_memory=False)

print("...Creating Model...")

config = {
        "learning_rate": 0.02,
        'feature_rep': "RandColoring:",
        "dataset": "reddit",
        "epochs": 30,
        "log_interval": 1,
        #'NUM_ACCUMULATION_STEPS': 8,
        'n_colors': 32,
        **hypers
    } 
wandb.init(
    # set the wandb project where this run will be logged
    project="Graphs-AAS",
    name="Recreation AmazonNet",
    # track hyperparameters and run metadata
    config=config
   
)
from itertools import chain
#model = GAT(config['num_features'], config['embedding_dim'], 3 , loader_train.dataset.num_classes, jk=None)
embed = torch.nn.Embedding(config['n_colors'], config['num_features'])
embed.to(device)
#model = AmazonNet(config['num_features'], config['embedding_dim'], loader_train.dataset.num_classes, n_heads=2)
model = N_GCP(
model.to(device)
wandb.watch(model, log_freq=1)
params = chain(model.parameters(), embed.parameters())
optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'])
criterion = torch.nn.CrossEntropyLoss()

In [None]:

print("...Start Training...")
pre_pools = []
post_pools = []
first_convs = []
names = []
hash_tensor = torch.vmap(lambda x: x % config['n_colors'])
def test(loader):
    model.eval()
    correct = 0
    
    for data in loader:
        #data = data.to(device)
        """
        input = torch.squeeze(embed(hash_tensor(data.x.long())))
        output = model(input, data.edge_index) #, data.batch
        """
        output = model(data.x, data.edge_index, data.batch) 
        output = F.softmax(output, dim=1)
        #color = F.softmax(color, dim=1)
        
        # selects the index of the max value
        pred = output.max(dim=1)[1]
        #truth = y = torch.flatten(torch.index_select(torch.reshape(data.y, (-1, 3)), 1, idx))
        correct += pred.eq(data.y).sum().item()
        
    return {'acc': correct / len(loader.dataset)}
def train_single(epoch):
    model.train()
    #idx = torch.tensor(0, device=device)
    loss_all = 0
    for data in loader_train:
        #input = torch.squeeze(embed(hash_tensor(data.x.long())))
        #out, pre_pool, post_pool, _ = model(input, data.edge_index, data.batch)  # , data.batch Perform a single forward pass.
        out, pre_pool, post_pool  = model(data.x, data.edge_index, data.batch)  # , data.batch Perform a single forward pass.
        out = F.softmax(out, dim=1)
        loss = criterion(out, data.y) 
        pre_pools.append(pre_pool)
        post_pools.append(post_pool)
        
        names.append(data.name)

        loss_all += loss.item() * data.num_graphs
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return {'train/cat_loss': loss_all / len(train_set)}

for epoch in range(1, config['epochs']):
    loss_train = train_single(epoch)
    print(loss_train)
    test_acc = test(loader_test)
    wandb.log({**loss_train, **test_acc})
    
wandb.finish()

In [None]:
model.eval()
loss = torch.nn.CrossEntropyLoss()
data0 = graph_dataset[0]
data1 = graph_dataset[1]
loss_series = []
df_pre = []
df_post = []
labels = []
first_convs = []
for data in test_set:
    #input = torch.squeeze(embed(hash_tensor(data.x.long())))
    out, pre, post = model(data.x, data.edge_index, data.batch)
    out = F.softmax(out, dim=1)
    loss_series.append(loss(out, data.y).cpu().detach().numpy())
    df_pre.append(pre.cpu().detach().numpy())
    df_post.append(post.cpu().detach().numpy())
    labels.append(data.y.cpu().detach().numpy()[0])
    #first_convs.append(first_conv.cpu().detach().numpy())
labels = pd.Series(labels, dtype="category")

In [None]:
sns.histplot(labels)

This Histplot shows that our testset, ignoring the class 7,  is rather balanced.
As such the set should be representative to all datasets

## Investigate wether the Nodes in a Graph have different features PRE pooling
As the graphs have different numbers of nodes they cant be stacked and have to be inspected induvidually.
But aggregate measures can be used.
For this the mean and the standard deviation of each node activation are taken featurewise.

These graph level aggregates are then again compared agaisnt all other graphs, solverwise.

This behaviour occurs with both encoding schemes


In [None]:
# graph wise std and mean of each feature dim
first_convs_std = np.array(list(map(lambda x: np.std(x, axis=0), df_pre)))
first_convs_mean = np.array(list(map(lambda x: np.mean(x, axis=0), df_pre)))

In [None]:
tmp = pd.DataFrame(first_convs_mean)
tmp['label'] = labels
fig, axs = plt.subplots(1, 2)
fig.suptitle('Mean of activations')
tmp.groupby(['label']).mean().T.plot(kind='bar', ax=axs[0])
tmp.groupby(['label']).std().T.plot(kind='bar', ax=axs[1])
for ax in axs.flat:
    ax.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off

plt.savefig('invest_RandColoring_mean.png', format='png')

In [None]:
tmp = pd.DataFrame(first_convs_std)
tmp['label'] = labels
fig, axs = plt.subplots(1, 2)
fig.suptitle('STD of activations')
tmp.groupby(['label']).mean().T.plot(kind='bar', ax=axs[0])
tmp.groupby(['label']).std().T.plot(kind='bar', ax=axs[1])
for ax in axs.flat:
    ax.tick_params(
    axis='x',          # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    labelbottom=False) # labels along the bottom edge are off

plt.savefig('invest_RandColoring_std.png', format='png')

Across all graph labels there is no differenciation between each label.???
From CITE we know that each layer in a GNN needs to be expressive to lead to a useful network architecture.
Warrant is therefore needed to investiate 1 ONE good layer and not stack them

In [None]:
train_set[0].x