In [2]:
import copy
import time
import json
import numpy as np
import pandas as pd

from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, classification_report, cohen_kappa_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
device = "cpu"
base_dir = ".."

# Load data

### neoplasm25

Laod all the variables used in the learning algorithm:


*   **var_dict** is the dictionary of all the variables grounded from the MRF (14086 for the predicate Link, 2246 for the predicate Text, 2246 for the predicate Type) 
*   **nn_inputs** contains the inputs of the neural networks
*   **gidx_mat** is a matrix used to link the variable of the predicate Text to the variable of the predicate Link (because it takes in input two Text)
*   **varidx2fidx** is a dictionary containing, for each variable, the indexes of the grounding of the formulas that the variable makes true
*   **stat** is a dictionary containing, for each grounding of each formula, the num­ber of times that each value of the variable makes that grounding true
*   **evidence_mask** is a masking array containing the truth value of the variables


In [5]:
var_dict = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/neoplasm25_variables.pt')
nn_inputs = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/neoplasm25_inputs.pt')
gidx_mat = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/neoplasm25_gidxmat.pt')
varidx2fidx = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/neoplasm25_varidx2fidx.pt')
stat = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/neoplasm25_stat.pt')
evidence_mask = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/neoplasm25_evidence_mask.pt')

Load the ground truth, validation and test set

In [6]:
type_true = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/type_true.pt')
link_true = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/train/link_true.pt')

In [7]:
val_inputs = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/val/neoplasm25_val_inputs.pt')
val_true = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/val/neoplasm25_val_true.pt')
type_val_true = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/val/neoplasm25_type_val_true.pt')

In [8]:
test_inputs = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/test/neoplasm25_test_inputs.pt')
link_test_true = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/test/neoplasm25_link_test_true.pt')
type_test_true = torch.load(base_dir + '/abstrct/data_notebook/neoplasm25/test/neoplasm25_type_test_true.pt')

Separate the two dictionaries

In [9]:
link_dict = {}

for i in range(14086):
    link_dict[i] = var_dict[i]

In [10]:
type_dict = {}

for i in range(16332, 16332+2246):
    type_dict[i-16332] = var_dict[i]

### glaucoma25
Load the glaucoma25 set for testing

In [11]:
gla_inputs = torch.load(base_dir + '/abstrct/data_notebook/glaucoma25/gla_inputs.pt')
type_gla_true = torch.load(base_dir + '/abstrct/data_notebook/glaucoma25/type_gla_true.pt')
link_gla_true = torch.load(base_dir + '/abstrct/data_notebook/glaucoma25/link_gla_true.pt')

### mixed25
Load the mixed25 set for testing

In [12]:
mix_inputs = torch.load(based_dir + '/abstrct/data_notebook/mixed25/mix_inputs.pt')
type_mix_true = torch.load(based_dir + '/abstrct/data_notebook/mixed25/type_mix_true.pt')
link_mix_true = torch.load(based_dir + '/abstrct/data_notebook/mixed25/link_mix_true.pt')

# Network

Define the two neural network for the tasks

In [14]:
class TypeNetwork(nn.Module):
    def __init__(self):
        super(TypeNetwork, self).__init__()
        self.fc1 = nn.Linear(25, 10)
        self.fc2 = nn.Linear(10, 20)
        self.fc3 = nn.Linear(20, 10)
        self.fc4 = nn.Linear(10, 2)
        
        self.dropout = nn.Dropout(0.4)
        self.activation = nn.ReLU()

    def forward(self, input):
        output = self.dropout(self.activation(self.fc1(input)))
        output = self.dropout(self.activation(self.fc2(output)))
        output = self.dropout(self.activation(self.fc3(output)))
        output = self.fc4(output)

        return output


class LinkNetwork(nn.Module):
    def __init__(self):
        super(LinkNetwork, self).__init__()
        self.fc1 = nn.Linear(50, 10)
        self.fc2 = nn.Linear(10, 20)
        self.fc3 = nn.Linear(20, 10)
        self.fc4 = nn.Linear(10, 2)
        
        self.dropout = nn.Dropout(0.4)
        self.activation = nn.ReLU()

    def forward(self, input):
        output = self.dropout(self.activation(self.fc1(input)))
        output = self.dropout(self.activation(self.fc2(output)))
        output = self.dropout(self.activation(self.fc3(output)))
        output = self.fc4(output)

        return output

# Training

In [16]:
def var_pl(varidx, wt):
    '''
    Computes the pseudo-likelihoods for the given variable under weights w. 
    '''

    values = 2
    var = var_dict[varidx]
    name, wt_idx1, wt_idx2, wt_idx3 = var

    # if the predicate is a Feature Predicate return the tensor [0, 1]
    if name == 'Text':
        return torch.tensor([0, 1], dtype=torch.float)

    if name == 'Link':        
        wt_idx = gidx_mat[wt_idx1][wt_idx2]-1
    
    gfs = varidx2fidx.get(varidx)
    if gfs is None: 
        # no list was saved, so the truth of all formulas is unaffected by the variable's value
        # uniform distribution applies
        p = 1.0 / values
        return p * torch.ones(values, device=device)
    sums = torch.zeros(values, device=device)
    for fidx, groundings in gfs.items():
        for gidx in groundings:
            for validx, n in enumerate(stat[fidx][gidx][varidx]):
                if ftype[fidx] == 'hard': 
                    # penalize the prob mass of every value violating a hard constraint
                    if n == 0: 
                        if fidx == 0:
                            sums[validx] = sums[validx] - 1000 * wt[fidx][wt_idx1][wt_idx3]
                        if fidx == 1:
                            sums[validx] = sums[validx] - 1000 * wt[fidx][wt_idx][wt_idx3]
                else:
                    if fidx == 0:
                        sums[validx] = sums[validx] + n * wt[fidx][wt_idx1][wt_idx3]
                    if fidx == 1:
                        sums[validx] = sums[validx] + n * wt[fidx][wt_idx][wt_idx3]

    return sums

In [17]:
def compute_pls(wt):
    '''
    Computes the pseudo-likelihoods for all the variables based on the
    weights wt which constitutes the outputs of the neural networks
    '''
    pls = []
    pls.append(torch.zeros((2246,2), device=device))
    pls.append(torch.zeros((14086,2), device=device))
    for varidx in type_dict:
        pls[0][varidx] = var_pl(varidx+16332, wt)
    for varidx in link_dict:
        pls[1][varidx] = var_pl(varidx, wt)
    
    return pls

In [18]:
def grad(w):
    '''
    Computes the gradient taking into consideration the pseudo-likelihoods
    '''
    pls = compute_pls(w)
    grad = torch.zeros(len(nnformulas), dtype=torch.float64)
    for fidx, groundval in stat.items():
        if fidx > 1:
            break
        for gidx, varval in groundval.items():
            for varidx, counts in varval.items():
                var = var_dict[varidx]
                name, _, _, evidx = var
                g = counts[evidx]
                if name == 'Text':
                    continue
                if name == 'Type':
                    plsidx = 0
                    varidx -= 16332
                if name == 'Link':
                    plsidx = 1
                for i, val in enumerate(counts):
                    g -= val * pls[plsidx][varidx][i]
                grad[fidx] += g
    
    return grad

In [19]:
def forward():
    '''
    Computes the forward step of the nural networks
    '''
    wt = []
    for fidx, nn in enumerate(nnformulas):
        wt.append(nn(nn_inputs[fidx]))

    return wt

In [20]:
def train(optimizer, criterion, grad_mod):
    '''
    Computes an epoch of the full training step
    '''
    for model in nnformulas:
        model.train()

    optimizer.zero_grad()
    preds = forward()
    # print(preds[1])
    loss1 = criterion[0](preds[0], type_true)
    loss2 = criterion[1](preds[1], link_true)
    loss = loss1+loss2

    loss.backward()
    if (grad_mod):
        gradient = grad(preds)
        for fidx, nn in enumerate(nnformulas):
            for par in nn.parameters():
                par.grad *= gradient[fidx]

    optimizer.step()
    return loss

In [21]:
def evaluate():
    '''
    Evaluate the model
    '''
    for model in nnformulas:
        model.eval()

    pred = []
    with torch.no_grad():
        for fidx, nn in enumerate(nnformulas):
            pred.append(nn(val_inputs[fidx]))
        y_link = pred[1]
        y_link_pred = y_link.argmax(dim=1)
        f1_link = f1_score(val_true, y_link_pred, average='macro', labels=[1])

        y_type = pred[0]
        y_type_pred = y_type.argmax(dim=1)
        f1_type = f1_score(type_val_true, y_type_pred, average='macro')

        return f1_link, f1_type

In [22]:
def training_loop(epochs, optimizer, criterion, pretrain=None,
                   early_stopping=True, early_stopping_epochs=1000, verbose=False):
    '''
    Computes the training algorithm with all the epochs and evaluate the model
    after each training step. It is possible to pretrain the model for a number
    of epochs given by the pretrain param.
    '''
    start_train = time.time()
    best_val_f1 = 0
    best_epoch = 0
    epochs_no_improve = 0
    best_params = copy.deepcopy(nnformulas.state_dict())
    grad_mod = False

    for epoch in range(epochs):
        start_epoch = time.time()
        
        if (pretrain):
            if (epoch > pretrain):
                grad_mod = True
        else:
            grad_mod = True

        loss_train = train(optimizer, criterion, grad_mod)
        train_time = time.time()

        val_f1_link, val_f1_type = evaluate()

        end_epoch = time.time()

        # Early stopping
        if val_f1_link > best_val_f1:
            epochs_no_improve = 0
            best_val_f1 = val_f1_link
            best_params = copy.deepcopy(nnformulas.state_dict())
            best_epoch = epoch
        else: 
            epochs_no_improve += 1
        
        if early_stopping and epochs_no_improve == early_stopping_epochs:
            if verbose:
                print('Early stopping!' )
            break

        if verbose and (epoch+1)%1 == 0:
            print(f'Epoch: {epoch+1} '
                    f' Loss: Train = [{loss_train:.4f}] '
                    f' F1: Val_Link = [{val_f1_link:.4f}] Val_Type = [{val_f1_type:.4f}] '
                    f' Time one epoch (s): {end_epoch-start_epoch:.4f} ')

    end_train= time.time()
    print(f"Best epoch {best_epoch+1}, F1_macro: {best_val_f1:.4f}")
    print(f'Time for training: {end_train-start_train}')

    return best_val_f1, best_epoch, best_params

In [23]:
# Define the types of formulas (in this case the first two are neural formulas and the other two are hard constraints)
ftype = ['nn', 'nn', 'hard', 'hard']
nnformulas = torch.nn.ModuleList()
nnformulas.append(TypeNetwork())
nnformulas.append(LinkNetwork())

optimizer = torch.optim.Adam(nnformulas.parameters(), lr=0.001)
criterion = [nn.CrossEntropyLoss(), nn.CrossEntropyLoss(weight=torch.tensor([0.1, 0.9]))]

In [None]:
best_valid_f1, best_epoch, best_params = training_loop(epochs=1000, optimizer=optimizer, criterion=criterion, 
                                                       pretrain=100, early_stopping=True, early_stopping_epochs=100, verbose=True)

# Train ensemble

In [None]:
n_models=20
best_models = []
for i in range(n_models):
    nnformulas = torch.nn.ModuleList()
    nnformulas.append(TypeNetwork())
    nnformulas.append(LinkNetwork())
    
    optimizer = torch.optim.Adam(nnformulas.parameters(), lr=0.001)
    criterion = [nn.CrossEntropyLoss(), nn.CrossEntropyLoss(weight=torch.tensor([0.1, 0.9]))]
    
    print(f'Model {i}')
    best_valid_f1, best_epoch, best_params = training_loop(epochs=1000, optimizer=optimizer, criterion=criterion, 
                                                           pretrain=None, early_stopping=True, early_stopping_epochs=100, verbose=False)
    best_models.append(best_params)

Model 0
Best epoch 1, F1_macro: 0.4402
Time for training: 240.01757836341858
Model 1
Best epoch 20, F1_macro: 0.4233
Time for training: 290.90355253219604
Model 2
Best epoch 4, F1_macro: 0.4563
Time for training: 248.33392453193665
Model 3
Best epoch 2, F1_macro: 0.4089
Time for training: 243.1462481021881
Model 4
Best epoch 4, F1_macro: 0.4468
Time for training: 245.45780754089355
Model 5
Best epoch 1, F1_macro: 0.4464
Time for training: 239.87665963172913
Model 6
Best epoch 1, F1_macro: 0.4474
Time for training: 238.34864115715027
Model 7
Best epoch 1, F1_macro: 0.4413
Time for training: 238.35264563560486
Model 8
Best epoch 4, F1_macro: 0.4336
Time for training: 245.46652698516846
Model 9
Best epoch 5, F1_macro: 0.4605
Time for training: 246.58912706375122
Model 10
Best epoch 1, F1_macro: 0.4382
Time for training: 236.7871265411377
Model 11
Best epoch 9, F1_macro: 0.4457
Time for training: 254.3463168144226
Model 12
Best epoch 2, F1_macro: 0.4466
Time for training: 238.9087495803833

# Evaluate

Evaluate the single model

In [None]:
nnformulas = torch.nn.ModuleList()
nnformulas.append(TypeNetwork())
nnformulas.append(LinkNetwork())
nnformulas.load_state_dict(best_params)

In [None]:
print('TYPE')
nnformulas[0].eval()
with torch.no_grad():
    pred = nnformulas[0](test_inputs[0])
    round_pred = pred.argmax(dim=1)
    print(classification_report(type_test_true, round_pred))

print('LINK')
nnformulas[1].eval()
with torch.no_grad():
    pred = nnformulas[1](test_inputs[1])
    round_pred = pred.argmax(dim=1)
    print(classification_report(link_test_true, round_pred))

Evalutate the ensemble with the average of scores (AVG) and the major voting (MAJ) 

In [30]:
# Model used in the thesis 
best_models = torch.load(base_dir + '/abstrct/data_notebook/trained_models/ens20_prebest_es100.pt')

In [31]:
preds_type = []
preds_link = []

for par in best_models:
    eval_model = torch.nn.ModuleList()
    eval_model.append(TypeNetwork())
    eval_model.append(LinkNetwork())
    eval_model.load_state_dict(par)
    
    eval_model.eval()
    with torch.no_grad():
        preds_type.append(eval_model[0](test_inputs[0]))
        preds_link.append(eval_model[1](test_inputs[1]))

print('TYPE_AVG')
pred_type_avg = torch.stack(preds_type).mean(dim=0)
round_pred_type_avg = pred_type_avg.argmax(dim=1)
print(classification_report(type_test_true, round_pred_type_avg))

print('LINK_AVG')
pred_link_avg = torch.stack(preds_link).mean(dim=0)
round_pred_link_avg = pred_link_avg.argmax(dim=1)
print(classification_report(link_test_true, round_pred_link_avg))

print('TYPE_MAJ')
pred_type_maj = torch.stack(preds_type).argmax(dim=2).sum(dim=0)
round_pred_type_maj = (pred_type_maj > len(best_models)/2).int()
print(classification_report(type_test_true, round_pred_type_maj))

print('LINK_MAJ')
pred_link_maj = torch.stack(preds_link).argmax(dim=2).sum(dim=0)
round_pred_link_maj = (pred_link_maj > len(best_models)/2).int()
print(classification_report(link_test_true, round_pred_link_maj))

TYPE_AVG
              precision    recall  f1-score   support

           0       0.77      0.69      0.73       248
           1       0.84      0.88      0.86       438

    accuracy                           0.81       686
   macro avg       0.80      0.79      0.79       686
weighted avg       0.81      0.81      0.81       686

LINK_AVG
              precision    recall  f1-score   support

           0       0.95      0.81      0.88      3956
           1       0.27      0.63      0.37       424

    accuracy                           0.80      4380
   macro avg       0.61      0.72      0.63      4380
weighted avg       0.89      0.80      0.83      4380

TYPE_MAJ
              precision    recall  f1-score   support

           0       0.76      0.73      0.75       248
           1       0.85      0.87      0.86       438

    accuracy                           0.82       686
   macro avg       0.81      0.80      0.80       686
weighted avg       0.82      0.82      0.82    

# Krippendorff evaluation

Measure the agreement between the models of the ensemble using the Krippendorff's alpha

In [27]:
!pip install krippendorff

Collecting krippendorff
  Downloading krippendorff-0.5.1-py3-none-any.whl (17 kB)
Installing collected packages: krippendorff
Successfully installed krippendorff-0.5.1


In [28]:
import krippendorff

In [32]:
rounded_pred_type = torch.stack(preds_type).argmax(dim=2)
krippendorff.alpha(reliability_data=np.array(rounded_pred_type))

0.8623505031839513

In [33]:
rounded_pred_link = torch.stack(preds_link).argmax(dim=2)
krippendorff.alpha(reliability_data=np.array(rounded_pred_link))

0.7000472917662675