In [1]:
import pickle
import numpy as np
from pytorch_pretrained_bert import BertTokenizer
import torch
import json

In [2]:
processed_data = pickle.load(open("../fever_processed.pickle", "rb"))
testing_data = np.asarray(processed_data[-len(processed_data)//10:])
training_data = np.asarray(processed_data[:-len(processed_data)//10])

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [4]:
def getSamples(data):
    classes = [line["label"] for line in data]
    positive_samples = np.asarray(classes) =="SUPPORTS"
    positive_samples = np.asarray([i for i in range(len(positive_samples)) if positive_samples[i] == True])
    negative_samples = np.asarray(classes) =="REFUTES"
    negative_samples = np.asarray([i for i in range(len(negative_samples)) if negative_samples[i] == True])
    print(len(positive_samples), len(negative_samples))
    return positive_samples, negative_samples

In [5]:
tr_p, tr_n = getSamples(training_data)
te_p, te_n = getSamples(testing_data)

71321 26572
7956 2922


In [6]:
max_length = 64

In [7]:
def getBatch(bs = 64, validation = False):
    source = training_data
    positive_samples = tr_p
    negative_samples = tr_n
    
    if (validation):
        source = testing_data
        positive_samples = te_p
        negative_samples = te_n
    
    n_samples = bs // 2
    p_samples = bs - n_samples
    positives = np.random.randint(0, len(positive_samples), (p_samples,))
    negatives = np.random.randint(0, len(negative_samples), (n_samples,))
    positives = positive_samples[positives]
    negatives = negative_samples[negatives]
    
    all_indices = []
    all_indices.extend(positives)
    all_indices.extend(negatives)
    
    _t = []
    _s = []
    _a = []
    _c = []
    
    for index in all_indices:
        _dp = ["[CLS]"]
        _dp.extend(source[index]["processed"]["claim"])
        _dp.append("[SEP]")
        for evid in source[index]["processed"]["evidentiary"]:
            _dp.extend(evid)
        _dp.append("[SEP]")
        
        while (len(_dp) < max_length):
            _dp.append("[PAD]")
        _dp = _dp[:max_length]
        _dp = np.asarray(tokenizer.convert_tokens_to_ids(_dp))
        segments = np.ones((max_length,))
        segments[:len(source[index]["processed"]["claim"]) + 2] = 0
        _class = 1 if source[index]["label"] == "SUPPORTS" else 0
        att_mask = [1 if _dp[index] >0 else 0 for index in range(len(_dp))]
        _t.append(_dp)
        _s.append(segments)
        _a.append(att_mask)
        _c.append(_class)
    
    text = torch.LongTensor(_t).cuda()
    segments = torch.LongTensor(_s).cuda()
    att = torch.LongTensor(_a).cuda()
    classes = torch.LongTensor(_c).cuda()
    
    return text, segments, att, classes
    #np.random.shuffle(_data)
t, s, a, c = getBatch(bs = 5, validation = False)
print(t.size(), s.size(), a.size(), c.size())

torch.Size([5, 64]) torch.Size([5, 64]) torch.Size([5, 64]) torch.Size([5])


In [8]:
import torch
import torch.nn.functional as F
from pytorch_pretrained_bert import BertModel
from QA_Attentions import *

In [15]:
class FaVer(torch.nn.Module):
    def __init__(self, bert_model = "bert-base-uncased"):
        super(FaVer, self).__init__()
        self.bert_model = bert_model
        self.bert_width = 768
        if ("-large-" in self.bert_model):
            self.bert_width = 1024
        self.bert = BertModel.from_pretrained(bert_model)
        #self.wd = torch.nn.Parameter(torch.FloatTensor(np.random.uniform(0, 1, (3*self.bert_width,))))
        #self.innerAttQuery = torch.nn.Parameter(torch.FloatTensor(np.random.uniform(0, 1, (self.bert_width, 512))))
        #self.innerAttDoc = torch.nn.Parameter(torch.FloatTensor(np.random.uniform(0, 1, (self.bert_width*4, 512))))
        #self.out = torch.nn.Linear((self.bert_width*5),1)
        #self.dropout = torch.nn.Dropout(0.1)
    
    def forward(self, t, s, a):
        text = self.bert(t,
                        token_type_ids=s, 
                        attention_mask=a, 
                        output_all_encoded_layers=False)
        cl_ = s == 0
        print(cl_)
        ev_ = s == 1
        claims = text * cl_
        evidences = text * ev_
        evidences = evidences * a
        """
        if (train_fp16):
            queries = self.dropout(queries * qa.unsqueeze(-1).half().cuda())
            documents = self.dropout(documents * da.unsqueeze(-1).half().cuda())
        else:
            queries = self.dropout(queries * qa.unsqueeze(-1).float())
            documents = self.dropout(documents * da.unsqueeze(-1).float())
        
        bdaf, ad2q, aq2d = biDAF(documents, queries, self.wd)
        q = InnerAttention(queries, self.innerAttQuery)
        d = InnerAttention(bdaf, self.innerAttDoc)
        _f = torch.cat([q,d],dim=-1)
        out_ = self.out(_f)
        return out_
        """

In [16]:
network = FaVer().cuda()
network.forward(t, s, a)

TypeError: mul(): argument 'other' (position 1) must be Tensor, not tuple