In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [2]:
import numpy as np

import torch
import math
import random

from tqdm import tqdm_notebook
from boltons.iterutils import pairwise
from scipy.stats import kendalltau
from itertools import permutations

from sent_order.models.kt_regression import SentenceEncoder, Regressor, Corpus, Batch
from sent_order.perms import sample_uniform_perms

In [31]:
sent_encoder = torch.load(
    '../../data/models/new/kt-reg/sent_encoder.68.bin',
    map_location={'cuda:0': 'cpu'},
)

In [32]:
regressor = torch.load(
    '../../data/models/new/kt-reg/regressor.68.bin',
    map_location={'cuda:0': 'cpu'},
)



In [33]:
train = Corpus('../../data/dev.json/', 10000)

100%|██████████| 10000/10000 [00:01<00:00, 6653.02it/s]


In [34]:
grafs = [g for g in train.grafs if len(g.sentences) == 3]

In [35]:
len(grafs)

1604

In [36]:
def predict(graf):
    
    sents = sent_encoder(graf.sentence_variables())
    
    perms = list(permutations(range(len(sents))))
    
    x = torch.stack([
        sents[torch.LongTensor(perm)]
        for perm in perms
    ])
    
    scores = regressor(x)
    
    return perms[torch.min(scores, 0)[1].data[0]]

In [37]:
kts = []
for g in tqdm_notebook(grafs):
    pred = predict(g)
    kt = kendalltau(pred, range(len(pred))).correlation
    kts.append(kt)




In [38]:
np.mean(kts)

0.81712385702410628

In [39]:
kts.count(1) / len(kts)

0.7655860349127181