In [4]:
from relation_modeling_utils import load_data

train_df = load_data("data/atomic2020_data-feb2021/train.tsv", multi_label=True)
val_df = load_data("data/atomic2020_data-feb2021/dev.tsv", multi_label=True)
test_df = load_data("data/atomic2020_data-feb2021/test.tsv", multi_label=True)

In [5]:
len(train_df), len(val_df), len(test_df)

(36940, 2962, 6569)

In [13]:
train_df.head()

Unnamed: 0,text,label
0,PersonX abandons ___ altogether,"[0, 0, 1]"
1,PersonX abandons the ___ altogether,"[0, 1, 1]"
2,PersonX abolishes ___ altogether,"[0, 1, 1]"
3,PersonX abolishes ___ in the states,"[0, 1, 1]"
4,PersonX abolishes the ___ altogether,"[0, 1, 1]"


In [6]:
def explode_labels(df):
    df['label_0'] = df.label.apply(lambda l: l[0])
    df['label_1'] = df.label.apply(lambda l: l[1])
    df['label_2'] = df.label.apply(lambda l: l[2])
    return df

In [7]:
train_df, val_df, test_df = explode_labels(train_df), explode_labels(val_df), explode_labels(test_df)

In [8]:
train_df.label_0.value_counts(), train_df.label_1.value_counts(), train_df.label_2.value_counts()

(0    22457
 1    14483
 Name: label_0, dtype: int64,
 0    18538
 1    18402
 Name: label_1, dtype: int64,
 1    21006
 0    15934
 Name: label_2, dtype: int64)

In [9]:
val_df.label_0.value_counts(), val_df.label_1.value_counts(), val_df.label_2.value_counts()

(0    2630
 1     332
 Name: label_0, dtype: int64,
 1    2263
 0     699
 Name: label_1, dtype: int64,
 1    2228
 0     734
 Name: label_2, dtype: int64)

In [10]:
test_df.label_0.value_counts(), test_df.label_1.value_counts(), test_df.label_2.value_counts()

(0    4668
 1    1901
 Name: label_0, dtype: int64,
 1    4419
 0    2150
 Name: label_1, dtype: int64,
 0    3996
 1    2573
 Name: label_2, dtype: int64)

In [11]:
import spacy
from tqdm import tqdm

nlp = spacy.load("en_core_web_sm", exclude=["ner"])

def create_vocab(data):
    vocab = set()
    text = " ".join(data.text.to_list())
    doc = nlp(text)
    for token in tqdm(doc, total=len(doc)):
        vocab.add(token.text.lower())
    
    return vocab

train_vocab, val_vocab, test_vocab = create_vocab(train_df), create_vocab(val_df), create_vocab(test_df)

100%|██████████| 140049/140049 [00:00<00:00, 894739.45it/s] 
100%|██████████| 14524/14524 [00:00<00:00, 1082910.93it/s]
100%|██████████| 27270/27270 [00:00<00:00, 1091368.28it/s]


In [14]:
len(train_vocab.intersection(val_vocab)) / len(train_vocab), len(train_vocab.intersection(val_vocab)) / len(val_vocab)

(0.15235929505400797, 0.8725581395348837)

In [13]:
len(train_vocab.intersection(test_vocab)) / len(train_vocab), len(train_vocab.intersection(test_vocab)) / len(test_vocab)

(0.27109559002680095, 0.8000958772770853)

In [8]:
import pandas as pd
atomic_df = pd.read_csv("data/atomic/v4_atomic_all_agg.csv")

In [9]:
atomic_df.head()

Unnamed: 0,event,oEffect,oReact,oWant,xAttr,xEffect,xIntent,xNeed,xReact,xWant,prefix,split
0,PersonX 'd better go,"[""none"", ""none""]","[""none"", ""none""]","[""none"", ""none"", ""none""]","[""avoidant"", ""weak"", ""hurried"", ""late"", ""Tardy...","[""She ran to the bathroom"", ""She finally made ...","[""to go somewhere else more important."", ""none""]","[""none"", ""none"", ""none""]","[""the person feels happy since he arrived at h...","[""to escape from him"", ""to resign his job"", ""t...","[""better"", ""go""]",dev
1,PersonX abandons ___ altogether,"[""none"", ""none""]","[""dejected""]","[""none"", ""none"", ""to find a new job for him"", ...","[""impatient"", ""decisive"", ""undependable"", ""fic...","[""gets a reputation as a quitter"", ""hangs head...","[""put a stop""]","[""Plows the field."", ""Gets exhausted from it.""...","[""authoritative""]","[""Sell his land."", ""Was just city."", ""to start...","[""abandons"", ""altogether""]",trn
2,PersonX abandons the ___ altogether,"[""none"", ""none"", ""none""]","[""defeat""]","[""none"", ""to do something else as well"", ""they...","[""flaky"", ""irresponsible"", ""desperate"", ""convi...","[""eats all the cakes"", ""abandons his diets too...","[""to appear not interested""]","[""none"", ""to get frustrated"", ""to determine it...","[""pressurized""]","[""to go out"", ""to find other place"", ""find som...","[""abandons"", ""altogether""]",trn
3,PersonX abolishes ___ altogether,"[""none"", ""none"", ""none""]","[""none""]","[""to be free"", ""to do things of their own wish...","[""ruthless"", ""destructive"", ""strict"", ""determi...","[""loss money"", ""change house"", ""get loan"", ""pe...","[""give a punishment in person""]","[""to have a plan"", ""to have a reason"", ""to kno...","[""he was sad""]","[""human to be free"", ""not to feel pain"", ""to m...","[""abolishes"", ""altogether""]",trn
4,PersonX abolishes ___ in the states,"[""none""]","[""none""]","[""to celebrate"", ""to write about the new law"",...","[""bold"", ""authoritative"", ""determined"", ""heroi...","[""none""]","[""this is unhappiness for people""]","[""to find a problem"", ""to find out to stop tha...","[""sad""]","[""to enforce the ruling"", ""memorialize the law...","[""abolishes"", ""states""]",trn


In [10]:
train_texts = set(train_df.text.to_list())
ood_test = [event for event in atomic_df.event if event not in train_texts]

In [12]:
len(atomic_df)

24312

In [11]:
len(ood_test)

4617

In [13]:
ood_df = pd.DataFrame({'text': ood_test})
ood_vocab = create_vocab(ood_df)

100%|██████████| 24675/24675 [00:00<00:00, 1083291.83it/s]


In [14]:
len(train_vocab.intersection(ood_vocab)) / len(train_vocab), len(train_vocab.intersection(ood_vocab)) / len(ood_vocab)

(0.17363761877690245, 0.8737229260318757)

In [19]:
import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/mismayil/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

In [17]:
import spacy
from spacy_wordnet.wordnet_annotator import WordnetAnnotator 

nlp = spacy.load('en_core_web_sm')
nlp.add_pipe("spacy_wordnet", after='tagger', config={'lang': nlp.lang})

<spacy_wordnet.wordnet_annotator.WordnetAnnotator at 0x7ff0c52bcdc0>

In [45]:
token = nlp('offered')[0]
token._.wordnet.synsets()

[Synset('offer.v.01'),
 Synset('offer.v.02'),
 Synset('volunteer.v.02'),
 Synset('offer.v.04'),
 Synset('offer.v.05'),
 Synset('offer.v.06'),
 Synset('offer.v.07'),
 Synset('offer.v.08'),
 Synset('offer.v.09'),
 Synset('put_up.v.02'),
 Synset('extend.v.04'),
 Synset('propose.v.05'),
 Synset('offer.v.13')]

In [35]:
train_vocab.intersection(test_vocab)

{'walks',
 'now',
 'pig',
 'lemonade',
 'loses',
 'offered',
 'enjoys',
 'airplane',
 'taking',
 'healthier',
 'seriously',
 'gardener',
 'rent',
 'sorry',
 'jar',
 'balcony',
 'time',
 'rays',
 'trainer',
 'beach',
 'juice',
 'wrench',
 'fast',
 'interviews',
 'chair',
 'learn',
 'kind',
 'hide',
 'appreciation',
 'infomercial',
 'sick',
 'shoe',
 'tools',
 'leg',
 'climatic',
 'took',
 'dirt',
 'joy',
 'cat',
 'posts',
 'button',
 'stings',
 'arm',
 'mower',
 'carried',
 'contains',
 'behavior',
 'sight',
 'dogs',
 'things',
 'asleep',
 'like',
 'age',
 'news',
 'new',
 'copies',
 'believe',
 'pace',
 'matters',
 'gets',
 'snacks',
 'worries',
 'kingdom',
 'go',
 'coat',
 'disposal',
 'consoles',
 'ladder',
 'decanter',
 'publishes',
 'rolex',
 'dealer',
 'tacos',
 'presses',
 'condition',
 'pluto',
 'afternoon',
 'comfortable',
 'cracker',
 'theater',
 'actor',
 'bus',
 'applies',
 'derives',
 'cheese',
 'see',
 'father',
 'firefighter',
 'herself',
 'station',
 'energy',
 't',
 'sp

In [12]:
import pandas as pd
transomcs_df = pd.read_csv("data/TransOMCS_full.txt", sep="\t", header=None, names=["head", "relation", "tail", "score"])

In [13]:
transomcs_df.head()

Unnamed: 0,head,relation,tail,score
0,student,AtLocation,school,1.0
1,building,AtLocation,city,1.0
2,sugar,AtLocation,coffee,1.0
3,government,AtLocation,city,1.0
4,school,AtLocation,city,1.0


In [14]:
transomcs_df[transomcs_df.isna().any(axis=1)]

Unnamed: 0,head,relation,tail,score
38215,,ReceivesAction,fasten,0.99
73145,,InstanceOf,warranty,0.99
108082,work,ReceivesAction,,0.99
114095,,ReceivesAction,read,0.99
124334,,ReceivesAction,lure,0.99
...,...,...,...,...
18379073,,ReceivesAction,dereference,0.00
18379482,ping,ReceivesAction,,0.00
18406398,,InstanceOf,betrothal,0.00
18438389,uptight,InstanceOf,,0.00


In [15]:
transomcs_df = transomcs_df.dropna()

In [16]:
len(transomcs_df)

18480653

In [22]:
transomcs_df.duplicated(subset=['head']).any()

True

In [23]:
from kogito.core.relation import CONCEPTNET_TO_ATOMIC_MAP, PHYSICAL_RELATIONS, EVENT_RELATIONS, SOCIAL_RELATIONS
from collections import defaultdict

def relation_to_class(relation):
    if relation in PHYSICAL_RELATIONS:
        return 0
    
    if relation in EVENT_RELATIONS:
        return 1
    
    if relation in SOCIAL_RELATIONS:
        return 2
    
    return None

test_ood_samples = []
unrecognized_rels = set()
head_label_map = defaultdict(set)

for row in transomcs_df.itertuples():
    heads = row.head.split()
    if not any([head in train_vocab for head in heads]):
        label = [0, 0, 0]
        rel_class = relation_to_class(row.relation)
        if rel_class is None:
            atomic_relations = CONCEPTNET_TO_ATOMIC_MAP.get(row.relation)
            if atomic_relations:
                if not isinstance(atomic_relations, list):
                    atomic_relations = [atomic_relations]
                
                for rel in atomic_relations:
                    rel_class = relation_to_class(rel)
                    head_label_map[row.head].add(rel_class)
            else:
                unrecognized_rels.add(row.relation)
        else:
            head_label_map[row.head].add(rel_class)

for head, labels in head_label_map.items():
    final_label = [1 if label in labels else 0 for label in range(3)]
    test_ood_samples.append((head, final_label))

In [24]:
len(test_ood_samples)

72407

In [25]:
unrecognized_rels

{'CreatedBy', 'InstanceOf'}

In [26]:
test_ood_df = pd.DataFrame(test_ood_samples, columns=['text', 'label'])

In [29]:
test_ood_df.head()

Unnamed: 0,text,label
0,curator,"[1, 1, 1]"
1,foyer,"[1, 1, 1]"
2,yolk,"[1, 1, 1]"
3,fade,"[1, 1, 1]"
4,pave,"[1, 1, 1]"


In [30]:
test_ood_df = explode_labels(test_ood_df)

In [31]:
test_ood_df.label_0.value_counts(), test_ood_df.label_1.value_counts(), test_ood_df.label_2.value_counts()

(1    72266
 0      141
 Name: label_0, dtype: int64,
 1    44593
 0    27814
 Name: label_1, dtype: int64,
 0    56184
 1    16223
 Name: label_2, dtype: int64)

In [32]:
from kogito.core.processors.relation import SWEMRelationClassifier
import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence
import spacy
from relation_modeling_utils import HeadDataset
from torch.utils.data import DataLoader

nlp = spacy.load("en_core_web_sm")
vocab = np.load(
    "./data/vocab_glove_100d.npy", allow_pickle=True
).item()

swem_classifier = SWEMRelationClassifier(pooling="avg")
swem_classifier.load_state_dict(
    torch.load(
        "./models/swem_multi_label_finetune_state_dict.pth"
    )
)

<All keys matched successfully>

In [33]:
swem_test_data = HeadDataset(test_ood_df, vocab=vocab)
swem_test_dataloader = DataLoader(swem_test_data, batch_size=len(swem_test_data), shuffle=True)

In [36]:
with torch.no_grad():
    swem_X, swem_y = next(iter(swem_test_dataloader))
    swem_preds = swem_classifier.forward(swem_X)

In [35]:
import torchmetrics

def report_metrics(preds, y):
    test_accuracy = torchmetrics.Accuracy()
    test_precision = torchmetrics.Precision(num_classes=3, average="weighted")
    test_recall = torchmetrics.Recall(num_classes=3, average="weighted")
    test_f1 = torchmetrics.F1Score(num_classes=3, average="weighted")
    print(f'Test accuracy={test_accuracy(preds, y).item():.3f}, precision={test_precision(preds, y).item():.3f}, recall={test_recall(preds, y).item():.3f}, f1={test_f1(preds, y).item():.3f}')

In [37]:
report_metrics(swem_preds, swem_y)

Test accuracy=0.544, precision=0.712, recall=0.461, f1=0.552


In [38]:
from torch import nn
import torch.nn.functional as F
from transformers import DistilBertModel, DistilBertTokenizer
import pytorch_lightning as pl
from torch.utils.data import Dataset

class DistilBertHeadDataset(Dataset):
    def __init__(self, df):
        self.tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
        self.labels = np.asarray(df['label'].to_list())
        self.texts = [self.tokenizer(text, padding='max_length', max_length=32, truncation=True,
                                     return_tensors="pt") for text in df['text']]

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]


class DistilBERTClassifier(pl.LightningModule):
    def __init__(self, num_classes=3, dropout=0.5, learning_rate=1e-4, freeze_emb=False):
        super().__init__()
        self.distilbert = DistilBertModel.from_pretrained('distilbert-base-uncased')
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(768, num_classes)

        if freeze_emb:
            for parameter in self.distilbert.parameters():
                parameter.requires_grad = False
            self.classifier = nn.Sequential(self.linear)
        else:
            self.classifier = nn.Sequential(self.dropout, self.linear)
    
    def forward(self, input_ids, mask):
        outputs = self.distilbert(input_ids=input_ids, attention_mask=mask, return_dict=False)
        outputs = self.classifier(outputs[0][:, 0, :])
        return outputs

    def predict_step(self, batch, batch_idx):
        X, y = batch
        mask = X['attention_mask']
        input_ids = X['input_ids'].squeeze(1)
        outputs = self.forward(input_ids, mask)
        probs = F.sigmoid(outputs)
        return probs

In [39]:
distilbert_classifier = DistilBERTClassifier.load_from_checkpoint('./models/distilbert_model.ckpt')

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_transform.bias', 'vocab_layer_norm.bias', 'vocab_layer_norm.weight', 'vocab_projector.bias', 'vocab_transform.weight', 'vocab_projector.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [40]:
dbert_test_data = DistilBertHeadDataset(test_ood_df)
dbert_test_dataloader = DataLoader(dbert_test_data, batch_size=128)

In [41]:
trainer = pl.Trainer()
d_preds = trainer.predict(distilbert_classifier, dbert_test_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
  rank_zero_warn(


Predicting:   0%|          | 1/566 [00:04<43:47,  4.65s/it]



Predicting:   2%|▏         | 12/566 [00:54<41:34,  4.50s/it]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [None]:
report_metrics(d_preds, y)