In [1]:
import copy
import time
import json
import numpy as np
import pandas as pd

from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, classification_report, cohen_kappa_score, accuracy_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
device = "cpu"
base_dir = ".."

# Load data

Laod all the variables used in the learning algorithm:


*   **var_dict** is the dictionary of all the variables grounded from the MRF (2000 for the predicate Digit, 20000 for the predicate Number) 
*   **nn_inputs** contains the inputs of the neural networks
*   **varidx2fidx** is a dictionary containing, for each variable, the indexes of the grounding of the formulas that the variable makes true
*   **stat** is a dictionary containing, for each grounding of each formula, the num­ber of times that each value of the variable makes that grounding true
*   **evidence_mask** is a masking array containing the truth value of the variables


In [4]:
var_dict = torch.load(base_dir + '/mnist/data_notebook/train/mnist_variables.pt')
nn_inputs = torch.load(base_dir + '/mnist/data_notebook/train/mnist_inputs.pt')
varidx2fidx = torch.load(base_dir + '/mnist/data_notebook/train/mnist_varidx2fidx.pt')
stat = torch.load(base_dir + '/mnist/data_notebook/train/mnist_stat.pt')
evidence_mask = torch.load(base_dir + '/mnist/data_notebook/train/mnist_evidence_mask.pt')

In [87]:
X_train, y_train = torch.load(base_dir + '/mnist/data_notebook/train/mnist_train.pt')
X_test, y_test = torch.load(base_dir + '/mnist/data_notebook/test/mnist_test.pt')

X_train = X_train.unsqueeze(dim=1).float()
X_test = X_test.unsqueeze(dim=1).float()

Load only the first 2000 samples for training

In [89]:
X_train, y_train = X_train[:2000], y_train[:2000]

In [86]:
nn_inputs = [X_train]
y_true = y_train

val_inputs = [X_test]
val_true = y_test

In [67]:
number_dict = {}

for i in range(2000, 22000):
    number_dict[i-2000] = var_dict[i]

# Network

In [47]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return x

# Training

In [29]:
def var_pl(varidx, wt):
    '''
    Computes the pseudo-likelihoods for the given variable under weights w. 
    '''
    values = 2
    var = var_dict[varidx]
    name, wt_idx1, wt_idx2, wt_idx3 = var

    if name == 'Digit':
        return torch.tensor([0, 1], dtype=torch.float)
    
    gfs = varidx2fidx.get(varidx)
    if gfs is None: 
        # no list was saved, so the truth of all formulas is unaffected by the variable's value
        # uniform distribution applies
        p = 1.0 / values
        return p * torch.ones(values, device=device)
    sums = torch.zeros(values, device=device)
    for fidx, groundings in gfs.items():
        for gidx in groundings:
            for validx, n in enumerate(stat[fidx][gidx][varidx]):
                if ftype[fidx] == 'hard': 
                    # penalize the prob mass of every value violating a hard constraint
                    if n == 0: 
                        sums[validx] = sums[validx] - 1000 * wt[fidx][wt_idx1][wt_idx2]
                else:
                    sums[validx] = sums[validx] + n * wt[fidx][wt_idx1][wt_idx2]

    return sums

In [30]:
def compute_pls(wt):
    '''
    Computes the pseudo-likelihoods for all the variables based on the
    weights wt which constitutes the outputs of the neural networks
    '''
    pls = []
    pls.append(torch.zeros((20000,2), device=device))

    for varidx in number_dict:
        pls[0][varidx] = var_pl(varidx+2000, wt)
    
    return pls

In [31]:
def grad(w):
    '''
    Computes the gradient taking into consideration the pseudo-likelihoods
    '''
    pls = compute_pls(w)
    grad = torch.zeros(len(nnformulas), dtype=torch.float64, device=device)
    plsidx = 0
    for fidx, groundval in stat.items():
        if fidx > 1:
            break
        for gidx, varval in groundval.items():
            for varidx, counts in varval.items():
                var = var_dict[varidx]
                name, _, _, evidx = var
                g = counts[evidx]
                if name == 'Text':
                    continue
                if name == 'Number':
                    plsidx = 0
                    varidx -= 2000

                for i, val in enumerate(counts):
                    g -= val * pls[plsidx][varidx][i]
                grad[fidx] += g
    # self.grad_opt_norm = torchsqrt(float(fsum([x * x for x in grad])))
    return grad

In [32]:
def forward():
    '''
    Computes the forward step of the nural networks
    '''
    wt = []
    for fidx, nn in enumerate(nnformulas):
        wt.append(nn(nn_inputs[fidx]))

    return wt

In [73]:
def train(optimizer, criterion, grad_mod):
    '''
    Computes an epoch of the full training step
    '''
    for model in nnformulas:
        model.train()

    optimizer.zero_grad()
    preds = forward()
    loss = criterion[0](preds[0], y_true)

    loss.backward()
    if (grad_mod):
        gradient = grad(preds)
        for fidx, nn in enumerate(nnformulas):
            for par in nn.parameters():
                par.grad *= gradient[fidx]

    optimizer.step()
    return loss

In [34]:
def evaluate():
    '''
    Evaluate the model
    '''
    for model in nnformulas:
        model.eval()

    pred = []
    with torch.no_grad():
        for fidx, nn in enumerate(nnformulas):
            pred.append(nn(val_inputs[fidx]))
        
        y = pred[0]
        y_pred = y.argmax(dim=1)
        y_pred = y_pred.to('cpu')
        
        acc = accuracy_score(val_true, y_pred)

        return acc

In [35]:
def training_loop(epochs, optimizer, criterion, pretrain=None,
                   early_stopping=True, early_stopping_epochs=1000, verbose=False):
    '''
    Computes the training algorithm with all the epochs and evaluate the model
    after each training step. It is possible to pretrain the model for a number
    of epochs given by the pretrain param.
    '''
    start_train = time.time()
    best_val_acc = 0
    best_epoch = 0
    epochs_no_improve = 0
    best_params = copy.deepcopy(nnformulas.state_dict())
    grad_mod = False

    for epoch in range(epochs):
        start_epoch = time.time()
        
        if (pretrain):
            if (epoch > pretrain):
                grad_mod = True
        else:
            grad_mod = True

        loss_train = train(optimizer, criterion, grad_mod)
        train_time = time.time()

        val_acc = evaluate()
        
        end_epoch = time.time()

        # Early stopping
        if val_acc > best_val_acc:
            epochs_no_improve = 0
            best_val_acc = val_acc
            best_params = copy.deepcopy(nnformulas.state_dict())
            best_epoch = epoch
        else: 
            epochs_no_improve += 1
        
        if early_stopping and epochs_no_improve == early_stopping_epochs:
            if verbose:
                print('Early stopping!' )
            break

        if verbose and (epoch+1)%1 == 0:
            print(f'Epoch: {epoch+1} '
                    f' Loss: Train = [{loss_train:.4f}] '
                    f' Acc: Val = [{val_acc:.4f}] '
                    f' Time one epoch (s): {end_epoch-start_epoch:.4f} ')

    end_train= time.time()
    print(f"Best epoch {best_epoch+1}, F1_macro: {best_val_acc:.4f}")
    print(f'Time for training: {end_train-start_train}')

    return best_val_acc, best_epoch, best_params

In [78]:
# Define the types of formulas (in this case only a neural formula)
ftype = ['nn']
nnformulas = torch.nn.ModuleList()
nnformulas.append(Net())

optimizer = torch.optim.Adam(nnformulas.parameters(), lr=0.001)
criterion = [nn.CrossEntropyLoss()]

In [None]:
best_valid_acc, best_epoch, best_params = training_loop(epochs=4000, optimizer=optimizer, criterion=criterion, 
                                                        pretrain=None, early_stopping=True, early_stopping_epochs=500, verbose=True)

Epoch: 1  Loss: Train = [20.1461]  Acc: Val = [0.1201]  Time one epoch (s): 6.1594 
Epoch: 2  Loss: Train = [13.3033]  Acc: Val = [0.1250]  Time one epoch (s): 6.5228 
Epoch: 3  Loss: Train = [9.0795]  Acc: Val = [0.1430]  Time one epoch (s): 6.6173 
Epoch: 4  Loss: Train = [6.0862]  Acc: Val = [0.1795]  Time one epoch (s): 6.8289 
Epoch: 5  Loss: Train = [4.4573]  Acc: Val = [0.2091]  Time one epoch (s): 6.8281 
Epoch: 6  Loss: Train = [3.6058]  Acc: Val = [0.2408]  Time one epoch (s): 6.8098 
Epoch: 7  Loss: Train = [2.9651]  Acc: Val = [0.2685]  Time one epoch (s): 6.7875 
Epoch: 8  Loss: Train = [2.6823]  Acc: Val = [0.2858]  Time one epoch (s): 6.7033 
Epoch: 9  Loss: Train = [2.5336]  Acc: Val = [0.2895]  Time one epoch (s): 6.8321 
Epoch: 10  Loss: Train = [2.3989]  Acc: Val = [0.2941]  Time one epoch (s): 6.7960 
Epoch: 11  Loss: Train = [2.3255]  Acc: Val = [0.2986]  Time one epoch (s): 6.8245 
Epoch: 12  Loss: Train = [2.2588]  Acc: Val = [0.3017]  Time one epoch (s): 6.5265 