# MC DropOut

Under regular classification MC Dropout works like this:

You take a **trained** model $f(\ , \theta^*)$ , and input some data to it (do inference).
But you add some noise (in the form of dropout) and do repeated inference.

#### Dropout

Dropout makes some parameters of the model zero so some neurons don't work. This is done randomly.
So let the model be defined by a specific set of $M$ parameters $\theta^t \in \mathcal{R}^M$.

Implementing dropout can be defined as randomly sampling a mask from a binary space of the same dimensions: $m \in \{ 0,1 \}^M$.

We can get a new set of parameters of the model by doing a hadamard product between the parameters and the mask by
$\theta_d^t = \theta^t \odot m$

#### Implications

In doing so, we effectively make a new model.
If we sample multiple masks, we get different models. 

#### MC Dropout

Use different models (by different dropouts) to predictions on the same input. Use them to compute confidence intervals


## MCD on Link Prediction

We're can't do the _exact_ same thing in our task so we'll make some adjustments to it.

# TODO: write out the rest

Get a matrix of nument x 100
A .8 softmax score of 95% means a particular entity gets a softmax of 0.8 95% of the times or more

# Getting Started

- Load a model
- Get a way to get all training valid and test triples

In [1]:
from run import Runner
from mytorch.utils.goodies import FancyDict
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from tqdm.auto import tqdm, trange
import torch
from typing import Union, Optional, Dict



In [2]:
args = {'name' : 'testrun',
'dataset' : 'RLF/lf-cp',
'model' : 'compgcn',
'score_func' : 'conve',
'opn' : 'corr',                 
'use_wandb' : False, 
'batch_size' : 128,       
'gamma' : 40.0,			
'gpu' : '-1',			
'max_epochs' : 1,  	
'l2' : 0.0,			
'lr' : 0.001,			
'lbl_smooth' : 0.1,	
'num_workers' : 10,                     
'seed' : 41504,     	
'restore' : False,            
'bias' : False   ,         
'num_bases' : -1, 	
'init_dim' : 100,	
'gcn_dim' : 200 ,	
'embed_dim' : None, 	
'gcn_layer' : 1 	,
'dropout' : 0.05	,
'hid_drop' : 0.15	,
'hid_drop2' : 0.15	,
'feat_drop' : 0.15	,
'k_w' : 10 	,
'k_h' : 20 	,
'num_filt' : 200, 	
'ker_sz' : 7 	,
'log_dir' : './log/',               
'config_dir' : './config/',
'trim': False,
'trim_ratio': 0.00005,
'use_fasttext': False
}
args = FancyDict(args)

model = Runner(args)
# # Now load the saved model
model.load_model('./checkpoints/rlf-lf-cp_dropout')

2023-07-26 13:45:09,623 - [INFO] - {}
{}


In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f3889e08f90>

In [4]:
df_tr = pd.DataFrame(model.data['train'], columns=['sub', 'rel', 'obj'])
df_tr['split'] = 'train'
df_tr

df_vl = pd.DataFrame(model.data['valid'], columns=['sub', 'rel', 'obj'])
df_vl['split'] = 'valid'
df_vl

df_ts = pd.DataFrame(model.data['test'], columns=['sub', 'rel', 'obj'])
df_ts['split'] = 'test'

dfs = {'train': df_tr, 'valid': df_vl, 'test': df_ts}

df_ts

Unnamed: 0,sub,rel,obj,split
0,8220,0,17574,test
1,15043,0,20547,test
2,3475,0,19623,test
3,8854,59,1729,test
4,26410,0,780,test
...,...,...,...,...
6695,6511,9,21561,test
6696,7902,10,4923,test
6697,3959,10,8307,test
6698,6227,0,11702,test


In [5]:
! free -h

               total       utilisé      libre     partagé tamp/cache   disponible
Mem:            62Gi       5,7Gi        34Gi       1,4Gi        22Gi        54Gi
Partition d'échange:      1,9Gi          0B       1,9Gi


In [6]:
def get_gold(
    model: Runner, 
    dfs: Dict[str, pd.DataFrame], 
    sub: Union[str, int], 
    rel: Optional[Union[str, int]] = None,
    return_str: bool = False,
    return_merged: bool = False
):
    
    # Encode the sub and rel 
    i_sub = sub if type(sub) is int else model.ent2id[sub]
    i_rel = None
    
    if rel is not None:
        i_rel = rel if type(rel) is int else model.rel2id[rel]
        
    # Unpack dfs
    df_tr, df_vl, df_ts = dfs['train'], dfs['valid'], dfs['test']    
        
    # Get train matches
    temp = df_tr[df_tr['sub'] == i_sub]
    if rel is not None:
        temp = temp[temp['rel'] == i_rel]
    tr_o = temp.obj.values.tolist()
    tr_r = temp.rel.values.tolist()
    
    # Get valid matches
    temp = df_vl[df_vl['sub'] == i_sub]
    if rel is not None:
        temp = temp[temp['rel'] == i_rel]
    vl_o = temp.obj.values.tolist()
    vl_r = temp.rel.values.tolist()
    
    # Get test matches
    temp = df_ts[df_ts['sub'] == i_sub]
    if rel is not None:
        temp = temp[temp['rel'] == i_rel]
    ts_o = temp.obj.values.tolist()
    ts_r = temp.rel.values.tolist()
    
    if return_str:
        # we encode everything and send back
        tr_o = [model.id2ent[x] for x in tr_o]
        vl_o = [model.id2ent[x] for x in vl_o]
        ts_o = [model.id2ent[x] for x in ts_o]
        tr_r = [model.id2rel[x] for x in tr_r]
        vl_r = [model.id2rel[x] for x in vl_r]
        ts_r = [model.id2rel[x] for x in ts_r]
        
    if return_merged:
        return tr_o + vl_o + ts_o, tr_r + vl_r + ts_r

    return tr_o, tr_r, vl_o, vl_r, ts_o, ts_r

In [7]:
def enable_dropout(model):
    """ Function to enable the dropout layers during test-time """
    for m in model.modules():
        if m.__class__.__name__.startswith('Dropout'):
            m.train()

# Actual Stuff

- For a given sub, rel
- Get 100 model predictions using dropout
- Make a ne x 100 matrix
- Mask out columns corresponding to triples already existing in the dataset
- Do a softmax for each row
- For a given confidence interval and for a given threshold, get the candidates

In [56]:
sub = 'table ii.1a'
sub = 'abus'
rel = 'anti'
n = 100

In [57]:
# Get predictions using different dropouts

# Let's get their IDs
id_sub, id_rel = model.ent2id[sub], model.rel2id[rel]
# Convert them to torch tensors of shape (n,) (repeat the same sub, rel 100 times)
t_sub = torch.tensor(id_sub).repeat(n)
t_rel = torch.tensor(id_rel).repeat(n)

# Get predictions based on that
with torch.no_grad():
    model.model.eval()   # enable dropouts
    enable_dropout(model.model)
    pred = model.model.forward(t_sub, t_rel)

print(pred.shape)

torch.Size([100, 27068])


In [58]:
# Alt: instead of that, really get predictions by running a for loop n times
t_sub = torch.tensor(id_sub).repeat(2)
t_rel = torch.tensor(id_rel).repeat(2)

pred = torch.zeros(n, model.p.num_ent, dtype=torch.float32)
model.model.eval()
enable_dropout(model.model)

with torch.no_grad():
    for i in trange(n):
        pred[i] = model.model.forward(t_sub, t_rel)[0]
        
pred.shape

  0%|          | 0/100 [00:00<?, ?it/s]

torch.Size([100, 27068])

In [59]:
# Get the mask to put on the preds
objs, rels = get_gold(model, dfs, sub, rel, return_merged=True)
# mask = torch.tensor(objs)
mask = torch.zeros(model.p.num_ent, dtype=torch.bool)
mask[objs] = 1
mask.shape, objs, mask.sum()

(torch.Size([27068]), [], tensor(0))

In [60]:
# Put the mask on the pred
pred_masked = pred.clone()
pred_masked[:, mask] = -10**10

# Do softmax (per row) over the pred
pred_masked_soft = torch.softmax(pred_masked, dim=1)

In [61]:
# Now set a confidence and softmax score threshold1

In [62]:
def top_k(preds, k=10):
    # Get the top k candidates for each prediction and make a mask like that (boolean)
    top_k = preds.argsort(dim=1, descending=True)[:,:k]
    res = torch.zeros_like(preds, dtype=torch.bool)
    for i, candidates in enumerate(top_k):
        res[i, candidates] = True
        
    return res

In [63]:
def score_threshold(preds, threshold = 0.1):
    return preds > threshold

In [64]:
def aggregate(raw, selected, conf_threshold=0.5):
    # Selected is a boolean mat of (num_ent, num_samples)
    selected = selected.float()
    per_entity_freq = selected.mean(dim=0)
    avg = raw.mean(dim=0)
    
    # print top ten candidates
    scores, indices = per_entity_freq.sort(descending=True)[:10]
    print("Top scoring candidates by this method: \n\n(conf): (agg. score): node")
    for i in range(10):
        print(f"{scores[i]:.4f}: {avg[indices[i]].item():.10f}: {model.id2ent[indices[i].item()]}")
        
    return per_entity_freq

In [65]:
def avg(pred_masked):
    per_entity_freq = pred_masked.mean(dim=0)
    # print top ten candidates
    scores, indices = per_entity_freq.sort(descending=True)[:10]
    print("Top scoring candidates by this method: \n\n(conf): node")
    for i in range(10):
        print(f"{scores[i]:.10f}: {model.id2ent[indices[i].item()]}")
        
    return per_entity_freq

In [66]:
op = aggregate(pred_masked_soft, top_k(pred_masked_soft))

Top scoring candidates by this method: 

(conf): (agg. score): node
1.0000: 0.0000432348: abuser i
0.1700: 0.0000369879: exploiter ii
0.1600: 0.0000369953: écologue n, fém
0.1500: 0.0000369815: honnêtement i.a
0.1200: 0.0000369843: retentir ii
0.1200: 0.0000369829: mollesse ii
0.1200: 0.0000369726: froid adj ii
0.1000: 0.0000369750: urgent
0.1000: 0.0000369745: orphelin n i.a
0.0900: 0.0000369738: potentiel adj


In [67]:
op = avg(pred_masked_soft)

Top scoring candidates by this method: 

(conf): node
0.0000432348: abuser i
0.0000369953: écologue n, fém
0.0000369879: exploiter ii
0.0000369843: retentir ii
0.0000369829: mollesse ii
0.0000369815: honnêtement i.a
0.0000369773: ter
0.0000369750: urgent
0.0000369745: orphelin n i.a
0.0000369738: potentiel adj


# Rough, ignore

In [17]:

batch = next(train_iter)
batch

(tensor([[ 4201,    10,  1678],
         [ 5608,     6, 19065]]),
 tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]))

In [22]:
[x.shape for x in batch]
sub, rel, obj, label    = model.read_batch(batch, split)
sub, rel, obj, label.argsort(dim=1, descending=True)[:10]

(tensor([4201, 5608]),
 tensor([10,  6]),
 tensor([ 1678, 19065]),
 tensor([[ 1678, 17701, 17713,  ...,  8848,  8847,  8846],
         [19065, 17701, 17713,  ...,  8848,  8847,  8846]]))

In [24]:
pred = model.model.forward(sub, rel)
pred.shape

torch.Size([2, 26558])

In [None]:
def predict(model, split='valid', mode='tail_batch', n=100):
    """
        Function to run model evaluation for a given mode

        Parameters
        ----------
        split: (string) 	If split == 'valid' then evaluate on the validation set, else the test set
        mode: (string):		Can be 'head_batch' or 'tail_batch'

        Returns
        -------
        resutls:			The evaluation results containing the following:
            results['mr']:         	Average of ranks_left and ranks_right
            results['mrr']:         Mean Reciprocal Rank
            results['hits@k']:      Probability of getting the correct prediction in top-k ranks based on predicted score

    """
    model.model.train(True)

    with torch.no_grad():
        results = {}
        train_iter = iter(model.data_iter['{}_{}'.format(split, mode.split('_')[0])])

        for step, batch in enumerate(train_iter):
            sub, rel, obj, label    = model.read_batch(batch, split)
            pred            = model.model.forward(sub, rel)
            b_range         = torch.arange(pred.size()[0], device=model.device)
            target_pred     = pred[b_range, obj]
            pred            = torch.where(label.byte(), -torch.ones_like(pred) * 10000000, pred)
            pred[b_range, obj]  = target_pred
            ranks           = 1 + torch.argsort(torch.argsort(pred, dim=1, descending=True), dim=1, descending=False)[b_range, obj]

            ranks           = ranks.float()
            results['count']    = torch.numel(ranks)        + results.get('count', 0.0)
            results['mr']       = torch.sum(ranks).item()   + results.get('mr',    0.0)
            results['mrr']      = torch.sum(1.0/ranks).item()   + results.get('mrr',   0.0)
            for k in range(10):
                results['hits@{}'.format(k+1)] = torch.numel(ranks[ranks <= (k+1)]) + results.get('hits@{}'.format(k+1), 0.0)


    if report_all:
        return results, all_ranks

    return results