In [1]:
%cd ..

/home/jbananafish/Desktop/Master/Thesis/code/gcnboost


In [2]:
from tqdm import tqdm
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, SAGEConv, RGCNConv, to_hetero
import torch_geometric.transforms as T

from src.data.artgraph import ArtGraph

In [3]:
torch.manual_seed(1)
torch.cuda.manual_seed(1)

In [4]:
#base_data = ArtGraph("./ekg", preprocess='node2vec', features=True, type='ekg')
base_data = ArtGraph("data", preprocess='node2vec', transform=T.ToUndirected(), features=True, type='ekg')

## Some graph-level information

In [5]:
print(f"Number of artist classes: {base_data.num_classes['artist']}")
print(f"Number of style classes: {base_data.num_classes['style']}")
print(f"Number of genre classes: {base_data.num_classes['genre']}")
print(f"Number of input features: {base_data.num_features}")

Number of artist classes: 300
Number of style classes: 83
Number of genre classes: 50
Number of input features: 128


## Some node-level information

In [6]:
data = base_data[0]

In [7]:
print(data)

HeteroData(
  [1martwork[0m={
    x=[61477, 128],
    y_artist=[61477],
    y_style=[61477],
    y_genre=[61477],
    train_mask=[61477],
    val_mask=[61477],
    test_mask=[61477]
  },
  [1martist[0m={ x=[300, 128] },
  [1mgallery[0m={ x=[1090, 128] },
  [1mcity[0m={ x=[665, 128] },
  [1mcountry[0m={ x=[64, 128] },
  [1mstyle[0m={ x=[83, 128] },
  [1mperiod[0m={ x=[53, 128] },
  [1mgenre[0m={ x=[50, 128] },
  [1mserie[0m={ x=[610, 128] },
  [1mauction[0m={ x=[5, 128] },
  [1mtag[0m={ x=[5146, 128] },
  [1mmedia[0m={ x=[160, 128] },
  [1msubject[0m={ x=[2161, 128] },
  [1mtraining_node[0m={ x=[108, 128] },
  [1mfield[0m={ x=[65, 128] },
  [1mmovement[0m={ x=[121, 128] },
  [1mpeople[0m={ x=[48, 128] },
  [1m(artist, influenced_rel, artist)[0m={ edge_index=[2, 62] },
  [1m(artist, subject_rel, subject)[0m={ edge_index=[2, 3648] },
  [1m(artist, training_rel, training_node)[0m={ edge_index=[2, 130] },
  [1m(artist, field_rel, field)[0m={ edge_in

In [8]:
data_homo = data.to_homogeneous()

In [9]:
data_homo

Data(x=[72206, 128], edge_index=[2, 947934], edge_type=[947934])

In [10]:
class GNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_layers, dropout):
        super(GNN, self).__init__()
        self.dropout = dropout
        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = SAGEConv((-1, -1), hidden_channels)
            self.convs.append(conv)
        self.conv_out = SAGEConv((-1, -1), out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index).relu()
            x = F.dropout(x, self.dropout)
        x = self.conv_out(x, edge_index)
        return F.log_softmax(x, dim=1)

In [11]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, num_layers, dropout):
        super(GCN, self).__init__()
        self.dropout = dropout
        self.convs = torch.nn.ModuleList()
        for _ in range(num_layers):
            conv = GCNConv(128, hidden_channels)
            self.convs.append(conv)
        self.conv_out = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index).relu()
            x = F.dropout(x, self.dropout)
        x = self.conv_out(x, edge_index)
        return F.log_softmax(x, dim=1)

In [12]:
class MGNN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, metadata, n_layers, dropout):
        super(MGNN, self).__init__()
        self.gnn_artist = GNN(hidden_channels, out_channels['artist'], n_layers, dropout)
        self.gnn_artist = to_hetero(self.gnn_artist, metadata)

        self.gnn_style = GNN(hidden_channels, out_channels['style'], n_layers, dropout)
        self.gnn_style = to_hetero(self.gnn_style, metadata)

        self.gnn_genre = GNN(hidden_channels, out_channels['genre'], n_layers, dropout)
        self.gnn_genre = to_hetero(self.gnn_genre, metadata)

    def forward(self, x, edge_index):
        return [self.gnn_artist(x, edge_index), self.gnn_style(x, edge_index), self.gnn_genre(x, edge_index)]

In [13]:
class MGCN(torch.nn.Module):
    def __init__(self, hidden_channels, out_channels, n_layers, dropout):
        super(MGCN, self).__init__()
        self.gnn_artist = GCN(hidden_channels, out_channels['artist'], n_layers, dropout)

        self.gnn_style = GCN(hidden_channels, out_channels['style'], n_layers, dropout)

        self.gnn_genre = GCN(hidden_channels, out_channels['genre'], n_layers, dropout)

    def forward(self, x, edge_index):
        return [self.gnn_artist(x, edge_index), self.gnn_style(x, edge_index), self.gnn_genre(x, edge_index)]

In [14]:
class ArtGraphGCNBoost:

    map_labels = {
        0: 'artist',
        1: 'style',
        2: 'genre'
    }

    def __init__(self, model, data, optimizer):
        
        self.data = data
        self.data_homo = data.to_homogeneous()
        self.artworks = data['artwork']
        self.y = torch.stack([data['artwork'].y_artist, data['artwork'].y_style, data['artwork'].y_genre])
        self.train_mask = self.artworks.train_mask
        self.val_mask = self.artworks.val_mask
        self.test_mask = self.artworks.test_mask

        self.model = model
        self.optimizer = optimizer

    def get_classes(self, label= 'artist', split='train'):
        pass 

    def get_accuracy(self, predicted, labels):
        return predicted.argmax(dim=1).eq(labels).sum()/predicted.shape[0]

    def get_accuracies_homo(self, predicted, labels, mask):
        size = self.train_mask.shape[0]
        accuracies = [] 
        for id, _ in self.map_labels.items():
            accuracies.append(self.get_accuracy(predicted[id][:size][mask], labels[id][mask]))
        return accuracies

    def get_accuracies(self, predicted, labels, mask):
        accuracies = [] 
        for id, _ in self.map_labels.items():
            accuracies.append(self.get_accuracy(predicted[id]['artwork'][mask], labels[id][mask]))
        return accuracies

    def get_loss(self, predicted, labels):
        return F.nll_loss(predicted, labels.type(torch.LongTensor))

    def get_losses(self, predicted, labels, mask):
        losses = []
        for id, _ in self.map_labels.items():
            losses.append(self.get_loss(predicted[id]['artwork'][mask], labels[id][mask]))
        return losses

    def get_losses_homo(self, predicted, labels, mask):
        size = self.train_mask.shape[0]
        losses = []
        for id, _ in self.map_labels.items():
            losses.append(self.get_loss(predicted[id][:size][mask], labels[id][mask]))
        return losses


    def multi_task_training(self, epoch):
        self.model.train()

        self.optimizer.zero_grad()
        out = model(self.data.x_dict, self.data.edge_index_dict)
        train_losses = self.get_losses(out, self.y, self.train_mask)
        train_total_loss = sum(train_losses)
        train_total_loss.backward()
        optimizer.step()

        train_accuracies = self.get_accuracies(out, self.y, self.train_mask)

        return out, train_losses, train_accuracies

    def multi_task_training_homo(self, epoch):
        self.model.train()

        self.optimizer.zero_grad()
        out = model(self.data_homo.x, self.data_homo.edge_index)
        train_losses = self.get_losses_homo(out, self.y, self.train_mask)
        train_total_loss = sum(train_losses)
        train_total_loss.backward()
        optimizer.step()

        train_accuracies = self.get_accuracies_homo(out, self.y, self.train_mask)

        return out, train_losses, train_accuracies

    def test(self, out):
        val_losses = self.get_losses(out, self.y, self.val_mask)
        test_losses = self.get_losses(out, self.y, self.test_mask)

        val_accuracies = self.get_accuracies(out, self.y, self.val_mask)
        test_accuracies = self.get_accuracies(out, self.y, self.test_mask)
        
        return val_losses, val_accuracies, test_losses, test_accuracies

    def test_homo(self, out):
        val_losses = self.get_losses_homo(out, self.y, self.val_mask)
        test_losses = self.get_losses_homo(out, self.y, self.test_mask)

        val_accuracies = self.get_accuracies_homo(out, self.y, self.val_mask)
        test_accuracies = self.get_accuracies_homo(out, self.y, self.test_mask)
        
        return val_losses, val_accuracies, test_losses, test_accuracies

In [15]:
model = MGNN(hidden_channels=16, out_channels=base_data.num_classes, metadata=data.metadata(),
            n_layers=1, dropout=0.5)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
gcn = ArtGraphGCNBoost(model, data, optimizer)
epochs = 1
for epoch in range(0, epochs):
    out, train_losses, train_accuracies = gcn.multi_task_training(epoch)
    val_losses, val_accuracies, test_losses, test_accuracies = gcn.test(out)

    if epoch % 10 == 0:
        print(f'Epoch: {epoch+1}')
        print(f'*\tOn training')
        for i, train_loss_acc in enumerate(zip(train_losses, train_accuracies)):
            print(f'\t{gcn.map_labels[i]}\t {round(train_loss_acc[0].detach().item(), 4)} \t{round(train_loss_acc[1].item(), 2) * 100}%')
        print(f'*\tOn validation')
        for i, val_loss_acc in enumerate(zip(val_losses, val_accuracies)):
            print(f'\t{gcn.map_labels[i]}\t {round(val_loss_acc[0].detach().item(), 4)} \t{round(val_loss_acc[1].item(), 2) * 100}%')

        print(f'*\tOn test')
        for i, test_loss_acc in enumerate(zip(test_losses, test_accuracies)):
            print(f'\t{gcn.map_labels[i]}\t {round(test_loss_acc[0].detach().item(), 4)} \t{round(test_loss_acc[1].item(), 2) * 100}%')
        
#torch.save(out, "out.pt")

KeyboardInterrupt: 

In [16]:
model = MGCN(hidden_channels=16, out_channels=base_data.num_classes,
            n_layers=1, dropout=0)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
gcn = ArtGraphGCNBoost(model, data, optimizer)
epochs = 1000
for epoch in range(0, epochs):
    out, train_losses, train_accuracies = gcn.multi_task_training_homo(epoch)
    val_losses, val_accuracies, test_losses, test_accuracies = gcn.test_homo(out)

    if epoch % 10 == 0:
        print(f'Epoch: {epoch+1}')
        print(f'*\tOn training')
        for i, train_loss_acc in enumerate(zip(train_losses, train_accuracies)):
            print(f'\t{gcn.map_labels[i]}\t {round(train_loss_acc[0].detach().item(), 4)} \t{round(train_loss_acc[1].item(), 2) * 100}%')
        print(f'*\tOn validation')
        for i, val_loss_acc in enumerate(zip(val_losses, val_accuracies)):
            print(f'\t{gcn.map_labels[i]}\t {round(val_loss_acc[0].detach().item(), 4)} \t{round(val_loss_acc[1].item(), 2) * 100}%')

        print(f'*\tOn test')
        for i, test_loss_acc in enumerate(zip(test_losses, test_accuracies)):
            print(f'\t{gcn.map_labels[i]}\t {round(test_loss_acc[0].detach().item(), 4)} \t{round(test_loss_acc[1].item(), 2) * 100}%')
        
#torch.save(out, "out.pt")

Epoch: 1
*	On training
	artist	 5.704 	0.0%
	style	 4.4586 	1.0%
	genre	 3.8537 	8.0%
*	On validation
	artist	 5.7025 	1.0%
	style	 4.4411 	1.0%
	genre	 3.8763 	8.0%
*	On test
	artist	 5.7018 	0.0%
	style	 4.4389 	1.0%
	genre	 3.8725 	7.000000000000001%
Epoch: 11
*	On training
	artist	 5.654 	1.0%
	style	 4.3648 	1.0%
	genre	 3.7477 	15.0%
*	On validation
	artist	 5.6621 	2.0%
	style	 4.3634 	1.0%
	genre	 3.7951 	14.000000000000002%
*	On test
	artist	 5.6633 	1.0%
	style	 4.3639 	2.0%
	genre	 3.7968 	15.0%
Epoch: 21
*	On training
	artist	 5.6017 	3.0%
	style	 4.272 	3.0%
	genre	 3.6431 	22.0%
*	On validation
	artist	 5.6194 	3.0%
	style	 4.2853 	4.0%
	genre	 3.7138 	21.0%
*	On test
	artist	 5.6227 	2.0%
	style	 4.2886 	4.0%
	genre	 3.7209 	22.0%
Epoch: 31
*	On training
	artist	 5.544 	4.0%
	style	 4.1761 	6.0%
	genre	 3.5368 	32.0%
*	On validation
	artist	 5.5721 	5.0%
	style	 4.2034 	8.0%
	genre	 3.6304 	27.0%
*	On test
	artist	 5.5778 	4.0%
	style	 4.2098 	7.000000000000001%
	genre	 