In [1]:
import json
import re
from nltk.tokenize import word_tokenize
from transformers import BertTokenizer
import torch
from torch.utils.data import Dataset
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

import torch
import torch.nn as nn
from torch.nn.utils.rnn import pack_padded_sequence
from torch.nn.utils.rnn import pad_sequence
from sklearn.metrics import f1_score
import nltk
nltk.download('punkt_tab')

from tqdm import tqdm

[nltk_data] Downloading package punkt_tab to /usr/share/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [2]:
with open("/kaggle/input/23342343/2020_acl_diplomacy-master/data/test.jsonl", "r", encoding="utf-8") as file:
    test_data = [json.loads(line) for line in file]

with open("/kaggle/input/23342343/2020_acl_diplomacy-master/data/train.jsonl", "r", encoding="utf-8") as file:
    train_data = [json.loads(line) for line in file]

with open("/kaggle/input/23342343/2020_acl_diplomacy-master/data/validation.jsonl", "r", encoding="utf-8") as file:
    val_data = [json.loads(line) for line in file]

In [3]:
def preprocess(sentence ):
    sentence=sentence.lower()

    sentence = re.sub(r"[^a-zA-Z0-9 ]", "", sentence)  # can use punctations with bert , not with glove
    sentence = re.sub(r"\s+", " ", sentence).strip() 

    return sentence
def prep_data(data ,  is_sender  , tokenizer):
    final_data=[]
    for data_points in data:
        for i, message in enumerate(data_points["messages"]):
            
            msg=preprocess(message) 
            msg=tokenizer(msg, truncation=True)
            if(len(msg)==0): continue

            if(is_sender):
              if(data_points['sender_labels'][i]=='NOANNOTATION'):
                continue
            else:
              if(data_points['receiver_labels'][i]=='NOANNOTATION'):
                continue

            final_data.append({"message":msg["input_ids"] ,
                               "attention_mask": msg["attention_mask"],
                               "label":(data_points["receiver_labels"][i],data_points["sender_labels"][i] )[is_sender] ,
                               "game_score_delta": int(data_points["game_score_delta"][i])})


    return final_data 

In [4]:
from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
val=prep_data(val_data, 1, tokenizer)
train=prep_data(train_data,1 , tokenizer)
test=prep_data(test_data ,1 , tokenizer)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [5]:
from torch.utils.data import Dataset
class Deception_dataset(Dataset):
    def __init__(self, data ):
        self.data = data
        # self.vocab= vocab
    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        data_p = self.data[idx]
        return  {
            "message": torch.tensor(data_p["message"], dtype=torch.long),
            "attention_mask":torch.tensor(data_p["attention_mask"] , dtype=torch.long),
            "label": torch.tensor(data_p["label"], dtype=torch.long),
            "game_score_delta": torch.tensor(data_p["game_score_delta"], dtype=torch.float)
        }


In [6]:
def collate_fn(batch):
    messages = [data_p["message"] for data_p in batch]
    am= [data_p["attention_mask"] for data_p in batch]
    labels = [data_p["label"] for data_p in batch]
    game_score_deltas = [data_p["game_score_delta"] for data_p in batch]
    lengths = [len(msg) for msg in messages] 
 

    msg_padded = pad_sequence(messages, batch_first=True, padding_value=0)
    am_padded = pad_sequence(am, batch_first=True, padding_value=0)
    
    
    return {
        "message": msg_padded.squeeze(0),
        "attention_mask":am_padded.squeeze(0),
        "lengths": torch.tensor(lengths, dtype=torch.long),
        "label": torch.stack(labels),
        "game_score_delta": torch.stack(game_score_deltas)
    }

In [7]:
import torch
import torch.nn as nn
%pip install git+https://github.com/geoopt/geoopt.git
import geoopt

class HyperbolicGRUCell(nn.Module):
    def __init__(self,inp_dim,hidden_size,manifold):
        super().__init__()
        self.inp_dim = inp_dim
        self.hid_dim = hidden_size
        self.manifold = manifold

        self.wz = nn.Parameter(nn.init.xavier_normal_(torch.empty(self.hid_dim,self.hid_dim+self.inp_dim,dtype=torch.float64)))
        self.wr = nn.Parameter(nn.init.xavier_normal_(torch.empty(self.hid_dim,self.hid_dim+self.inp_dim,dtype=torch.float64)))
        self.w = nn.Parameter(nn.init.xavier_normal_(torch.empty(self.hid_dim,self.hid_dim+self.inp_dim,dtype=torch.float64)))

    def forward(self,h,x):
        # h and x are of shape bs x hid_dim, bs x inp_dim
        # this is just one pass, not for a sequence
        # print("h shape: ",h.shape)
        # print("x shape: ",x.shape)
        h_x = torch.cat((h,x),dim=-1)
        z = torch.sigmoid(self.manifold.logmap0(self.manifold.projx(self.manifold.mobius_matvec(self.wz,h_x))))
        r = torch.sigmoid(self.manifold.logmap0(self.manifold.projx(self.manifold.mobius_matvec(self.wr,h_x))))
        h_x_to_tilde = self.manifold.projx(self.manifold.expmap0(torch.cat((self.manifold.logmap0(self.manifold.mobius_pointwise_mul(r,h)),self.manifold.logmap0(x)),dim=-1)))
        h_tilde = self.manifold.expmap0(torch.tanh(self.manifold.logmap0(self.manifold.projx(self.manifold.mobius_matvec(self.w,h_x_to_tilde)))))
        h = self.manifold.projx(self.manifold.mobius_add(self.manifold.mobius_pointwise_mul(1-z,h),self.manifold.mobius_pointwise_mul(z,h_tilde)))
        return h



class HyperbolicGRULayer(nn.Module):
    def __init__(self,inp_dim,hidden_size,manifold,dirs):
        super().__init__()
        self.inp_dim = inp_dim
        self.hid_dim = hidden_size
        self.manifold = manifold
        self.dirs = dirs
        self.gru_cell = HyperbolicGRUCell(self.inp_dim,self.hid_dim,self.manifold)
        self.h_init = geoopt.tensor.ManifoldParameter(self.manifold.projx(self.manifold.expmap0(torch.zeros(self.hid_dim,dtype=torch.float64))),manifold=self.manifold)

    def forward(self,seq,lengths):
        # seq is bs x max_seq_len x inp_dim
        # h_init is bs x inp_dim
        # lengths is bs
        max_len = seq.shape[1]
        bs = seq.shape[0]
        hid = self.h_init.expand(bs,-1)
        outs_left = []

        # left to right
        for i in range(max_len):
            inp = seq[:,i,:]
            out = self.gru_cell(hid,inp) # bs x hid_dim
            # but not all outputs will be valid. since some sequences may be padded. so i need to ignore those somehow
            # mask = torch.tensor([1 if len>i else 0 for len in lengths]).unsqueeze(-1).to(seq.device) # error if i dont unsqueeze. inefficient
            mask = (lengths>i).float().unsqueeze(-1).to(seq.device)
            hid = self.manifold.projx(self.manifold.mobius_add(self.manifold.mobius_pointwise_mul(mask,out),self.manifold.mobius_pointwise_mul(1-mask,hid)))
            outs_left.append(hid)

        if(self.dirs==2):
            hid = self.h_init.expand(bs, -1)
            seq = torch.flip(seq,dims=[1])
            outs_right = []
            # right to left
            for i in range(max_len):
                inp = seq[:,i,:]
                out = self.gru_cell(hid,inp) # bs x hid_dim
                # but not all outputs will be valid. since some sequences may be padded. so i need to ignore those somehow
                # mask = torch.tensor([1 if len>i else 0 for len in lengths]).unsqueeze(-1).to(seq.device) # error if i dont unsqueeze. inefficient
                mask = (lengths>i).float().unsqueeze(-1).to(seq.device)
                hid = self.manifold.projx(self.manifold.mobius_add(self.manifold.mobius_pointwise_mul(mask,out),self.manifold.mobius_pointwise_mul(1-mask,hid)))
                outs_right.append(hid)

        outs = torch.stack(outs_left,dim=1)
        if(self.dirs==2):
            outs_right.reverse()
            outs_right = torch.stack(outs_right,dim=1)
            outs = self.manifold.projx(self.manifold.expmap0(torch.cat((self.manifold.logmap0(outs),self.manifold.logmap0(outs_right)),dim=-1)))

        return outs, outs[:,-1,:]

class HyperbolicGRU(nn.Module):
    def __init__(self,inp_dim,hidden_size,manifold,num_layers,dirs):
        super().__init__()
        self.inp_dim = inp_dim
        self.hid_dim = hidden_size
        self.manifold = manifold
        self.num_layers = num_layers
        self.dirs = dirs
        self.layers = nn.ModuleList()
        for i in range(self.num_layers):
            layer_inp_dim = self.inp_dim if i == 0 else (self.hid_dim if self.dirs==1 else 2*self.hid_dim)
            self.layers.append(HyperbolicGRULayer(layer_inp_dim, self.hid_dim, self.manifold, self.dirs))

    def forward(self,seq,lengths):
        inp = seq

        for idx,layer in enumerate(self.layers):
            # print("input to layer ",idx," : inp:",inp.shape," , hid: ",hid.shape)
            inp,hid = layer(inp,lengths)

        return hid



Collecting git+https://github.com/geoopt/geoopt.git
  Cloning https://github.com/geoopt/geoopt.git to /tmp/pip-req-build-br2jm0vj
  Running command git clone --filter=blob:none --quiet https://github.com/geoopt/geoopt.git /tmp/pip-req-build-br2jm0vj
  Resolved https://github.com/geoopt/geoopt.git to commit eaadc68fcae361778edf078b503ed79e4497c071
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: geoopt
  Building wheel for geoopt (pyproject.toml) ... [?25l[?25hdone
  Created wheel for geoopt: filename=geoopt-0.5.1-py3-none-any.whl size=90072 sha256=5fad467dd5a10b59693da6161771b392b4912e7cdaeee1cdd4bc3548373ae08e
  Stored in directory: /tmp/pip-ephem-wheel-cache-xu9fcpzy/wheels/09/2e/93/3d8584e16e5ec698ebc7f6b77c083cabd05967833af32f69af
Successfully built geoopt
Installing collected packages: geoopt
Successfully i

In [8]:
class HyLinear(nn.Module):
    def __init__(self,input_dim,output_dim,act,manifold,bias=None):
        super(HyLinear, self).__init__()
        self.inp_dim = input_dim
        self.out_dim = output_dim
        self.manifold = manifold
        self.activation = act
        self.weight_matrix = nn.Parameter(torch.randn((self.out_dim,self.inp_dim),dtype = torch.float64))
        if(bias is not None):
            self.bias = geoopt.tensor.ManifoldParameter(self.manifold.projx(self.manifold.expmap0(torch.zeros(self.out_dim,dtype=torch.float64))),manifold=self.manifold)
        else:
            self.bias=None

    def reset_parameters(self):
        torch.nn.init.xavier_uniform_(self.weight_matrix, gain=math.sqrt(2))
        if(self.bias is not None):
            torch.nn.init.constant_(self.bias, 0)


    def forward(self,x):
        # x = x.double()
        op = self.manifold.mobius_matvec(self.weight_matrix,x)
        op = self.manifold.projx(op)
        if(self.bias is not None):
            op = self.manifold.mobius_add(op,self.bias)
            op = self.manifold.projx(op)
        if(self.activation is not None):
            op = self.manifold.projx(self.manifold.expmap0(self.activation(self.manifold.logmap0(op))))
        return op

In [9]:
import torch
import torch.nn as nn

class Model_Bert(nn.Module):
    def __init__(self, embed_model, embed_size, hidden_size,  num_classes,  manifold, num_layers, dirs):
        super(Model_Bert, self).__init__()

        self.manifold = manifold
        self.num_layers = num_layers
        self.dirs = dirs
        
        self.embedding_model = embed_model
        self.embedding_size = embed_size
        self.hidden_size = hidden_size
        
        for param in self.embedding_model.parameters():
                param.requires_grad = False # hyperparameter

        
        self.hy_gru = HyperbolicGRU(embedding_dim,hidden_size,self.manifold,self.num_layers,self.dirs)
        self.fc1 = HyLinear(hidden_size * 2, hidden_size,torch.nn.ReLU(),self.manifold,True) if self.dirs==2 else HyLinear(hidden_size, hidden_size,torch.nn.ReLU(),self.manifold,True)
        self.fc2 = nn.Linear(hidden_size,num_classes,dtype=torch.float64)
    
    def forward(self,  input_id, am , lengths):
        embeddings = self.manifold.expmap0(self.embedding_model(input_id, attention_mask=am).last_hidden_state)

        # print(embeddings.shape)
        # packed = pack_padded_sequence(embeddings, lengths, batch_first=True, enforce_sorted=False)
       
        last_hidden = self.hy_gru(embeddings,lengths) # bs x hid_dim
        logits = self.fc2(self.manifold.logmap0(self.fc1(last_hidden)))
        return logits

In [10]:

from transformers import BertModel
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score
from tqdm import tqdm

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

manifold = geoopt.manifolds.PoincareBall(c=1.0)

embedding_dim = 768
hidden_size = 128
num_classes = 2

model_embed = BertModel.from_pretrained('bert-base-uncased').to(device)

model = Model_Bert(model_embed, embedding_dim, hidden_size, num_classes,manifold, num_layers = 1, dirs=2).to(device)


class_weights = torch.tensor([1.0 / 0.10, 1.0 / 0.90], dtype=torch.float64)
class_weights = class_weights.to(device)


optimizer = geoopt.optim.RiemannianAdam(model.parameters(),lr=1e-3)
loss = nn.CrossEntropyLoss(weight=class_weights)

train_dataset = Deception_dataset(train) 
val_dataset = Deception_dataset(val)      
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, collate_fn=collate_fn)

from tqdm import tqdm

for epoch in (range(15)):

    model.train()
    train_loss = 0
    train_preds, train_labels = [], []
    for batch in tqdm(train_dataloader):
        messages = batch["message"].to(device)    
        lengths = batch["lengths"]                
        labels = batch["label"].to(device) 
        am=batch["attention_mask"].to(device)
        
        optimizer.zero_grad()
        logits = model(messages, am ,lengths)
        loss_ = loss(logits, labels)
        loss_.backward()
        optimizer.step()
        
        train_loss += loss_.item()
        preds = torch.argmax(logits, dim=1).cpu().numpy()  
        train_preds.extend(preds)
        train_labels.extend(labels.cpu().numpy())     
    
    train_loss /= len(train_dataloader)
    train_f1 = f1_score(train_labels, train_preds, average='macro')
   
    model.eval()
    val_loss = 0
    val_preds, val_labels = [], []
    with torch.no_grad():
        for batch in val_dataloader:
            messages = batch["message"].to(device)    
            lengths = batch["lengths"]                
            labels = batch["label"].to(device)    
            am=batch["attention_mask"].to(device)
            
            logits = model(messages,am , lengths)
            loss_ = loss(logits, labels)
            
            val_loss += loss_.item()
            preds = torch.argmax(logits, dim=1).cpu().numpy()  
            val_preds.extend(preds)
            val_labels.extend(labels.cpu().numpy())          
    
    val_loss /= len(val_dataloader)
    val_f1 = f1_score(val_labels, val_preds, average='macro')
    
    torch.save(model.state_dict(),f"epoch_{epoch+1}.pth")
    print(f"Epoch {epoch+1}:  Train Loss: {train_loss:.4f}, Train F1: {train_f1:.4f} Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}")

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

100%|██████████| 206/206 [24:13<00:00,  7.06s/it]


Epoch 1:  Train Loss: 0.5821, Train F1: 0.4927 Val Loss: 0.5556, Val F1: 0.4899


100%|██████████| 206/206 [24:11<00:00,  7.05s/it]


Epoch 2:  Train Loss: 0.5717, Train F1: 0.4901 Val Loss: 0.5289, Val F1: 0.4899


100%|██████████| 206/206 [23:49<00:00,  6.94s/it]


Epoch 3:  Train Loss: 0.5632, Train F1: 0.5023 Val Loss: 0.5021, Val F1: 0.5138


100%|██████████| 206/206 [23:47<00:00,  6.93s/it]


Epoch 4:  Train Loss: 0.5467, Train F1: 0.5371 Val Loss: 0.5588, Val F1: 0.4721


100%|██████████| 206/206 [23:38<00:00,  6.89s/it]


Epoch 5:  Train Loss: 0.5409, Train F1: 0.5481 Val Loss: 0.5003, Val F1: 0.5530


100%|██████████| 206/206 [25:07<00:00,  7.32s/it]


Epoch 6:  Train Loss: 0.5222, Train F1: 0.5722 Val Loss: 0.5174, Val F1: 0.5139


100%|██████████| 206/206 [23:40<00:00,  6.90s/it]


Epoch 7:  Train Loss: 0.5092, Train F1: 0.5756 Val Loss: 0.5244, Val F1: 0.5181


100%|██████████| 206/206 [23:39<00:00,  6.89s/it]


Epoch 8:  Train Loss: 0.4946, Train F1: 0.5851 Val Loss: 0.5327, Val F1: 0.5301


100%|██████████| 206/206 [23:24<00:00,  6.82s/it]


Epoch 9:  Train Loss: 0.4683, Train F1: 0.5895 Val Loss: 0.5253, Val F1: 0.5287


100%|██████████| 206/206 [23:45<00:00,  6.92s/it]


Epoch 10:  Train Loss: 0.4417, Train F1: 0.6051 Val Loss: 0.5612, Val F1: 0.5119


100%|██████████| 206/206 [23:32<00:00,  6.85s/it]


Epoch 11:  Train Loss: 0.4338, Train F1: 0.6094 Val Loss: 0.5760, Val F1: 0.4936


100%|██████████| 206/206 [23:41<00:00,  6.90s/it]


Epoch 12:  Train Loss: 0.4009, Train F1: 0.6248 Val Loss: 0.5327, Val F1: 0.5199


100%|██████████| 206/206 [23:16<00:00,  6.78s/it]


Epoch 13:  Train Loss: 0.4057, Train F1: 0.6285 Val Loss: 0.5808, Val F1: 0.5151


100%|██████████| 206/206 [23:21<00:00,  6.80s/it]


Epoch 14:  Train Loss: 0.3846, Train F1: 0.6293 Val Loss: 0.5503, Val F1: 0.5206


100%|██████████| 206/206 [23:34<00:00,  6.87s/it]


Epoch 15:  Train Loss: 0.3477, Train F1: 0.6520 Val Loss: 0.6203, Val F1: 0.5108


In [11]:
test_dataset = Deception_dataset(test)    
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_fn)


model.eval()
test_preds, test_labels = [], []
with torch.no_grad():
    for batch in test_dataloader:
        messages = batch["message"].to(device)  
        lengths = batch["lengths"]           
        labels = batch["label"].to(device)    
        am=batch["attention_mask"].to(device)
        
        logits = model(messages,am, lengths)
  
        preds = torch.argmax(logits, dim=1).cpu().numpy()  
        test_preds.extend(preds)
        test_labels.extend(labels.cpu().numpy())       


test_f1 = f1_score(test_labels, test_preds, average='macro')

print(test_f1)

0.5249639285244299
