# GCN

### Installs

In [None]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [None]:
!pip install sentence-transformers
!pip install rdflib
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

! pip install torchmetrics

### Imports

In [None]:
import torch.nn.functional as F
import os
import torch
import numpy as np
import random


from typing import List, Callable

from torch import Tensor, device
import pandas as pd

In [None]:
os.chdir("/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET")

In [None]:
from torch_geometric.nn import GATConv, GCNConv, RGATConv


In [None]:
import model_functions.helper_funcs as helper_funcs
import model_functions.model_definitions as model_definition
import model_functions.losses as losses_funcs
import math
import random
import numpy as np

In [None]:
class GCNClassic(torch.nn.Module):
    def __init__(self, num_node_features):
        super().__init__()
        self.conv1_one = GCNConv(num_node_features, num_node_features)
        self.conv1_two = GCNConv(num_node_features, num_node_features)

    def forward(self, data):

        x_one, edge_index_one = data['x_one'], data['edge_index_one']
        x_one = self.conv1_one(x_one, edge_index_one).relu()
        x_one = F.dropout(x_one, training=self.training)

        x_two, edge_index_two = data['x_two'], data['edge_index_two']
        x_two = self.conv1_two(x_two, edge_index_two).relu()
        x_two = F.dropout(x_two, training=self.training)
        return x_one, x_two

Data

In [None]:
os.chdir("/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/data")
use_case = "anatomy_diff"
a_or_b = "case_a"
data = torch.load(use_case +  "/" + a_or_b + ".pt")

In [None]:
#double_left_tensor, double_right_tensor = create_fake_data(data.train_set_right, data.train_set_left)
#torch.save(double_left_tensor, preprocessed_data_path+"/"+use_case+"/double_left_tensor.pt")
#torch.save(double_right_tensor, preprocessed_data_path+"/"+use_case+"/double_right_tensor.pt")
#double_left_tensor = torch.load(preprocessed_data_path+"/"+use_case+"/a/double_left_tensor.pt")
#double_right_tensor = torch.load(preprocessed_data_path+"/"+use_case+"/a/double_right_tensor.pt")

In [None]:
double_left_tensor, double_right_tensor = helper_funcs.create_fake_data(data.train_set_right, data.train_set_left)

###Functions for new loss

In [None]:
class CosineSimilarityLoss4(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.loss_fct = torch.nn.MSELoss()
    
    def forward(self, data, lefts, rights):
        x_one, x_two = self.model(data)
        
        left_embeddings = torch.index_select(x_one, 0, lefts)
        right_embeddings = torch.index_select(x_two, 0, rights)
        length_of_anti_alignments = len(lefts) - (len(data.train_set_left)) 
        print("len of anti-alignments: ", length_of_anti_alignments)


        
        output = torch.cosine_similarity(left_embeddings, right_embeddings)

        if length_of_anti_alignments == 0:
          return self.loss_fct(output, torch.ones(int(len((data.train_set_left)))))
        else:
          return self.loss_fct(output, torch.cat(((torch.ones(int(len(data.train_set_left))),
                                                 torch.zeros(int(length_of_anti_alignments)))))) # negatives

In [None]:
def create_anti_alignment(epoch, matches):

  # Check, if wrong matches exist to train the model with
  lefts, rights, wrong_set_left, wrong_set_right = helper_funcs.filter_out_wrong_matches(matches, data)
  max_length = len(data.train_set_right)

  if len(wrong_set_left) > 0:
    
    # Create mixed version of anti-alignments

    # 1. Create the proportions
    prop_model = epoch / 1000
    proportion_of_model_faults = math.floor(prop_model * 100)/100.0
    proportion_of_model_random = 1 - proportion_of_model_faults
    #print('prop model: ' , proportion_of_model_faults)
    #print('prop random: ' , proportion_of_model_random)

    # 2. Choose wrong examples from model proportionately
    count_of_model_faults = math.floor(proportion_of_model_faults * max_length) # calculation, how many examples are considered
    #print("count_of_model_faults", count_of_model_faults)
    #chosen_indices = [random.randint(0, len(wrong_set_left)) for x in range(count_of_model_faults)]
    amount_that_can_be_chosen = min(count_of_model_faults, len(wrong_set_right))
    chosen_indices = random.sample(range(len(wrong_set_right)), amount_that_can_be_chosen)
    #print("chosen_indices", chosen_indices)
    picked_left = list(np.array(wrong_set_left)[chosen_indices])
    picked_right = list(np.array(wrong_set_right)[chosen_indices])
    #print('picked_left: ', picked_left)
    #print('picked_left: ', picked_right)

    # 3. Create random anti-alignments
    
    random_length = round(proportion_of_model_random * max_length)
    #print('random length', random_length)
    m = max(data.train_set_right.tolist()) # max value of mapped_to
    complete_list = list(np.arange(0, m))
    non_commoners = list(set(complete_list) ^ set(data.train_set_right.tolist()))
    wrong_right_indices = [random.choice(non_commoners) for x in range(0, (random_length))]
    #print('len of random anti alignments: ', len(wrong_right_indices))

    # 4. Pack it all together 

    # First part is the true alignment, then the random alignment and then the model-generated faults
    lefts = torch.cat((data.train_set_left, data.train_set_left[:len(wrong_right_indices)], torch.tensor(picked_left)), 0)
    rights = torch.cat((data.train_set_right, torch.tensor(wrong_right_indices), torch.tensor(picked_right)), 0)

    lefts = lefts.type(torch.int64)
    rights = rights.type(torch.int64)
    return lefts, rights
  
  # In the case that no model output exists yet, only the random version happens   
  else:
    lefts, rights = helper_funcs.create_fake_data(data.train_set_right, data.train_set_left)
    return lefts, rights


In [None]:
def create_anti_alignment_more_random(epoch, matches):

  # Check, if wrong matches exist to train the model with
  lefts, rights, wrong_set_left, wrong_set_right = helper_funcs.filter_out_wrong_matches(matches, data)
  max_length = len(data.train_set_right)

  if len(wrong_set_left) > 0:
    
    # Create mixed version of anti-alignments

    # 1. Create the proportions
    prop_model = epoch / 2000
    proportion_of_model_faults = math.floor(prop_model * 100)/100.0
    proportion_of_model_random = 1 - proportion_of_model_faults
    #print('prop model: ' , proportion_of_model_faults)
    #print('prop random: ' , proportion_of_model_random)

    # 2. Choose wrong examples from model proportionately
    count_of_model_faults = math.floor(proportion_of_model_faults * max_length) # calculation, how many examples are considered
    #print("count_of_model_faults", count_of_model_faults)
    #chosen_indices = [random.randint(0, len(wrong_set_left)) for x in range(count_of_model_faults)]
    amount_that_can_be_chosen = min(count_of_model_faults, len(wrong_set_right))
    chosen_indices = random.sample(range(len(wrong_set_right)), amount_that_can_be_chosen)
    #print("chosen_indices", chosen_indices)
    picked_left = list(np.array(wrong_set_left)[chosen_indices])
    picked_right = list(np.array(wrong_set_right)[chosen_indices])
    #print('picked_left: ', picked_left)
    #print('picked_left: ', picked_right)

    # 3. Create random anti-alignments
    
    random_length = round(proportion_of_model_random * max_length)
    #print('random length', random_length)
    m = max(data.train_set_right.tolist()) # max value of mapped_to
    complete_list = list(np.arange(0, m))
    non_commoners = list(set(complete_list) ^ set(data.train_set_right.tolist()))
    wrong_right_indices = [random.choice(non_commoners) for x in range(0, (random_length))]
    #print('len of random anti alignments: ', len(wrong_right_indices))

    # 4. Pack it all together 

    # First part is the true alignment, then the random alignment and then the model-generated faults
    lefts = torch.cat((data.train_set_left, data.train_set_left[:len(wrong_right_indices)], torch.tensor(picked_left)), 0)
    rights = torch.cat((data.train_set_right, torch.tensor(wrong_right_indices), torch.tensor(picked_right)), 0)

    lefts = lefts.type(torch.int64)
    rights = rights.type(torch.int64)
    return lefts, rights
  
  # In the case that no model output exists yet, only the random version happens   
  else:
    lefts, rights = helper_funcs.create_fake_data(data.train_set_right, data.train_set_left)
    return lefts, rights

In [None]:
def create_anti_alignment_model_faults_first(epoch, matches):

  # Check, if wrong matches exist to train the model with
  lefts, rights, wrong_set_left, wrong_set_right = helper_funcs.filter_out_wrong_matches(matches, data)
  max_length = len(data.train_set_right)

  if len(wrong_set_left) > 0:
    
    # Create mixed version of anti-alignments

    # 1. Create the proportions
    prop_random = epoch / 1000
    proportion_of_model_random = math.floor(prop_random * 100)/100.0
    proportion_of_model_faults = 1 - proportion_of_model_random


    # 2. Create random anti-alignments
    
    random_length = round(proportion_of_model_random * max_length)
    #print('random length', random_length)
    m = max(data.train_set_right.tolist()) # max value of mapped_to
    complete_list = list(np.arange(0, m))
    non_commoners = list(set(complete_list) ^ set(data.train_set_right.tolist()))
    wrong_right_indices = [random.choice(non_commoners) for x in range(0, (random_length))]
    #print('len of random anti alignments: ', len(wrong_right_indices))

    # 3. Choose wrong examples from model proportionately
    count_of_model_faults = math.floor(proportion_of_model_faults * max_length) # calculation, how many examples are considered
    #print("count_of_model_faults", count_of_model_faults)
    #chosen_indices = [random.randint(0, len(wrong_set_left)) for x in range(count_of_model_faults)]
    amount_that_can_be_chosen = min(count_of_model_faults, len(wrong_set_right))
    chosen_indices = random.sample(range(len(wrong_set_right)), amount_that_can_be_chosen)
    #print("chosen_indices", chosen_indices)
    picked_left = list(np.array(wrong_set_left)[chosen_indices])
    picked_right = list(np.array(wrong_set_right)[chosen_indices])
    #print('picked_left: ', picked_left)
    #print('picked_left: ', picked_right)


    # 4. Pack it all together 

    # First part is the true alignment, then the random alignment and then the model-generated faults
    lefts = torch.cat((data.train_set_left, data.train_set_left[:len(wrong_right_indices)], torch.tensor(picked_left)), 0)
    rights = torch.cat((data.train_set_right, torch.tensor(wrong_right_indices), torch.tensor(picked_right)), 0)

    lefts = lefts.type(torch.int64)
    rights = rights.type(torch.int64)
    return lefts, rights
  
  # In the case that no model output exists yet, no additional loss happens
  else:
    return data.train_set_left, data.train_set_right
    


In [None]:
def create_anti_alignment_model_faults_first_other_proportions(epoch, matches):

  # Check, if wrong matches exist to train the model with
  lefts, rights, wrong_set_left, wrong_set_right = helper_funcs.filter_out_wrong_matches(matches, data)
  max_length = len(data.train_set_right)

  if len(wrong_set_left) > 0:
    
    # Create mixed version of anti-alignments

    # 1. Create the proportions
    prop_random = epoch / 2000
    proportion_of_model_random = math.floor(prop_random * 100)/100.0
    proportion_of_model_faults = 1 - proportion_of_model_random


    # 2. Create random anti-alignments
    
    random_length = round(proportion_of_model_random * max_length)
    #print('random length', random_length)
    m = max(data.train_set_right.tolist()) # max value of mapped_to
    complete_list = list(np.arange(0, m))
    non_commoners = list(set(complete_list) ^ set(data.train_set_right.tolist()))
    wrong_right_indices = [random.choice(non_commoners) for x in range(0, (random_length))]
    #print('len of random anti alignments: ', len(wrong_right_indices))

    # 3. Choose wrong examples from model proportionately
    count_of_model_faults = math.floor(proportion_of_model_faults * max_length) # calculation, how many examples are considered
    #print("count_of_model_faults", count_of_model_faults)
    #chosen_indices = [random.randint(0, len(wrong_set_left)) for x in range(count_of_model_faults)]
    amount_that_can_be_chosen = min(count_of_model_faults, len(wrong_set_right))
    chosen_indices = random.sample(range(len(wrong_set_right)), amount_that_can_be_chosen)
    #print("chosen_indices", chosen_indices)
    picked_left = list(np.array(wrong_set_left)[chosen_indices])
    picked_right = list(np.array(wrong_set_right)[chosen_indices])
    #print('picked_left: ', picked_left)
    #print('picked_left: ', picked_right)


    # 4. Pack it all together 

    # First part is the true alignment, then the random alignment and then the model-generated faults
    lefts = torch.cat((data.train_set_left, data.train_set_left[:len(wrong_right_indices)], torch.tensor(picked_left)), 0)
    rights = torch.cat((data.train_set_right, torch.tensor(wrong_right_indices), torch.tensor(picked_right)), 0)

    lefts = lefts.type(torch.int64)
    rights = rights.type(torch.int64)
    return lefts, rights
  
  # In the case that no model output exists yet, no additional loss happens
  else:
    return data.train_set_left, data.train_set_right
    


In [None]:
def create_anti_alignment_v43_2000(epoch, matches, data):
    # Check, if wrong matches exist to train the model with
    lefts, rights, wrong_set_left, wrong_set_right = helper_funcs.filter_out_wrong_matches(matches, data)
    max_length = len(data.train_set_right)

    if len(wrong_set_left) > 0:
      if epoch <= 1000:


        # Create mixed version of anti-alignments

        # 1. Create the proportions
        prop_random = epoch / 1000
        proportion_of_model_random = math.floor(prop_random * 100) / 100.0
        proportion_of_model_faults = 1 - proportion_of_model_random

        # 2. Create random anti-alignments

        random_length = round(proportion_of_model_random * max_length)
        # print('random length', random_length)
        m = max(data.train_set_right.tolist())  # max value of mapped_to
        complete_list = list(np.arange(0, m))
        non_commoners = list(set(complete_list) ^ set(data.train_set_right.tolist()))
        wrong_right_indices = [random.choice(non_commoners) for x in range(0, (random_length))]
        # print('len of random anti alignments: ', len(wrong_right_indices))

        # 3. Choose wrong examples from model proportionately
        count_of_model_faults = math.floor(
            proportion_of_model_faults * max_length)  # calculation, how many examples are considered
        # print("count_of_model_faults", count_of_model_faults)
        # chosen_indices = [random.randint(0, len(wrong_set_left)) for x in range(count_of_model_faults)]
        amount_that_can_be_chosen = min(count_of_model_faults, len(wrong_set_right))
        chosen_indices = random.sample(range(len(wrong_set_right)), amount_that_can_be_chosen)
        # print("chosen_indices", chosen_indices)
        picked_left = list(np.array(wrong_set_left)[chosen_indices])
        picked_right = list(np.array(wrong_set_right)[chosen_indices])
        # print('picked_left: ', picked_left)
        # print('picked_left: ', picked_right)

        # 4. Pack it all together

        # First part is the true alignment, then the random alignment and then the model-generated faults
        lefts = torch.cat(
            (data.train_set_left, data.train_set_left[:len(wrong_right_indices)], torch.tensor(picked_left)), 0)
        rights = torch.cat((data.train_set_right, torch.tensor(wrong_right_indices), torch.tensor(picked_right)), 0)

        lefts = lefts.type(torch.int64)
        rights = rights.type(torch.int64)
        return lefts, rights
      else:
        lefts, rights = helper_funcs.create_fake_data(data.train_set_right, data.train_set_left)
        return lefts, rights
    else:
      return data.train_set_left, data.train_set_right

### Model Run

In [None]:
device = 'cpu' 
source_data = data.to(device) 
model = GCNClassic(384).to(device)
myloss = losses_funcs.CosineSimilarityLoss4(model)


optimizer = torch.optim.SGD(model.parameters(), lr=0.1, weight_decay=0.0005, momentum=0.9)

def train():
    model.train()
    optimizer.zero_grad()
    
    #loss = myloss(source_data, matches) # bei 0, 2
    #double_left_tensor, double_right_tensor = create_fake_data(data.train_set_right, data.train_set_left) # bei 3
    #loss = myloss(source_data, matches, double_left_tensor, double_right_tensor) # bei 1
    lefts, rights = losses_funcs.create_anti_alignment_v41(epoch, matches, source_data)
    loss = myloss(source_data, lefts, rights)
    loss.backward()
    optimizer.step()
    return float(loss)

In [None]:
precisions = []
recalls = []
losses = []

precision_tests = []
recall_tests = []

wrong_sets_left_train = []
wrong_sets_right_train = []
matches = []
all_matches = []

for epoch in range(1, 1000):
    loss = train()
    prec, rec, matches = helper_funcs.test_harder('train', model, source_data)
    prec_test, rec_test, matches_test= helper_funcs.test_harder('test', model, source_data)
    print(f'TRAINING Epoch: {epoch:02d}, Loss: {loss:.4f}, Precision: {prec:.4f} , Recall: {rec:.4f} ')
    print(f'TEST Epoch: {epoch:02d}, Precision: {prec_test:.4f} , Recall: {rec_test:.4f} ')


    precisions.append(prec)
    recalls.append(rec)
    losses.append(loss)
    precision_tests.append(prec_test)
    recall_tests.append(rec_test)
    x, y, wrong_sets_left, wrong_sets_right = helper_funcs.filter_out_wrong_matches(matches, data)
    wrong_sets_left_train.append(wrong_sets_left)
    wrong_sets_right_train.append(wrong_sets_right)
    all_matches.append(matches)

TRAINING Epoch: 01, Loss: 0.3607, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 01, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 02, Loss: 0.3511, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 02, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 03, Loss: 0.3365, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 03, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 04, Loss: 0.3152, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 04, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 05, Loss: 0.2999, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 05, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 06, Loss: 0.2860, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 06, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 07, Loss: 0.2748, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 07, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 08, Loss: 0.2708, Precision: 0.0000 , Recall: 0.0000 
TEST Epoch: 08, Precision: 0.0000 , Recall: 0.0000 
TRAINING Epoch: 09, Loss

Save output

In [None]:
all_matches[998]

In [None]:
version = 'V213'
case = "case_a"

In [None]:
import pickle

In [None]:
with open("/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET/results/trainings/GCN/wrong_sets_left_" + version + "_" + case , "wb") as fp:   #Pickling
        pickle.dump(wrong_sets_left_train, fp)

In [None]:
with open("/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET/results/trainings/GCN/wrong_sets_right_" + version + "_" + case,"wb") as fp:   #Pickling
        pickle.dump(wrong_sets_right_train, fp)

In [None]:
with open("/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET/results/trainings/GCN/matches" + version + "_" + case, "wb") as fp:   #Pickling
        pickle.dump(all_matches, fp)

In [None]:
#with open("/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET/results/trainings/GCN/macheinfach.pt", "wb") as fp:   #Pickling
 #       pickle.dump(output, fp)

In [None]:
#torch.save(output, "/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET/results/trainings/GCN/macheinfach#")

In [None]:
model_name = 'GCNConv'
#helper_funcs.save_training_verlauf(losses, precisions, recalls,
#                      precision_tests, recall_tests,
#                      use_case, a_or_b, model_name, version)

In [None]:
track = "anatomy_diff"

In [None]:
df = pd.DataFrame(list(zip(losses, precisions, recalls, precision_tests, recall_tests)), columns =['losses', 'precisions_train', 'recalls_train', 'precisions_test', 'recalls_test'])

In [None]:
path = "/content/gdrive/My Drive/1_Studium/2_Master/Master Thesis/2_Projekt/Scratch/all_in/MAGNET/results/trainings/GCN/" + track + "_" + case + "_" + model_name + "_" + version
df.to_csv(path + ".csv")