In [1]:
%load_ext autoreload
%autoreload 2

### Sample usage

In [4]:
from kogito.models.bart.comet import COMETBART
from kogito.inference import CommonsenseInference
from kogito.core.model import KnowledgeModel
from kogito.core.knowledge import KnowledgeGraph

model: KnowledgeModel = COMETBART.from_pretrained("mismayil/comet-bart-ai2")
csi = CommonsenseInference()
text = "PersonX becomes a great basketball player"
kgraph: KnowledgeGraph = csi.infer(text, model)
kgraph.to_jsonl("results/kgraph.json")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/comet-bart-ai2/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/comet-bart-ai2/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/comet-bart-ai2/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/comet-bart-ai2/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "

Extracting heads...
Matching relations...
Generating commonsense graph...


100%|██████████| 1/1 [00:16<00:00, 16.84s/it]


### Saving models

In [2]:
model.save_pretrained("./comet-bart")

### Customizing processors

In [5]:
csi.processors

{'head': ['sentence_extractor',
  'noun_phrase_extractor',
  'verb_phrase_extractor'],
 'relation': ['simple_relation_matcher', 'graph_relation_matcher']}

In [6]:
csi.remove_processor("noun_phrase_extractor")

In [7]:
csi.processors

{'head': ['sentence_extractor', 'verb_phrase_extractor'],
 'relation': ['simple_relation_matcher', 'graph_relation_matcher']}

In [8]:
from typing import Optional, List
from spacy.tokens import Doc
import spacy

from kogito.core.processors.head import KnowledgeHeadExtractor, KnowledgeHead

class AdjectiveHeadExtractor(KnowledgeHeadExtractor):
   def extract(self, text: str, doc: Optional[Doc] = None) -> List[KnowledgeHead]:
      if not doc:
            doc = self.lang(text)

      heads = []

      for token in doc:
            if token.pos_ == "ADJ":
               heads.append(KnowledgeHead(text=token.text, entity=token))

      return heads

adj_extractor = AdjectiveHeadExtractor("adj_extractor", spacy.load("en_core_web_sm"))
csi.add_processor(adj_extractor)

In [9]:
csi.processors

{'head': ['sentence_extractor', 'verb_phrase_extractor', 'adj_extractor'],
 'relation': ['simple_relation_matcher', 'graph_relation_matcher']}

In [10]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph

csi = CommonsenseInference()


### Dry-run

In [11]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True)
kgraph.to_jsonl("results/kgraph_dry_run.json")

Extracting heads...
Matching relations...


In [12]:
text = "I wanted to feed him. he didnt listen to me"
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True)
kgraph.to_jsonl("results/kgraph_dry_run_2.json")

Extracting heads...
Matching relations...


### Relation subset

In [13]:

from kogito.core.relation import OBJECT_USE, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, relations=[OBJECT_USE, CAUSES])
kgraph.to_jsonl("results/kgraph_rel_subset.json")

Extracting heads...
Matching relations...


### No Head extraction

In [14]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, extract_heads=False)
kgraph.to_jsonl("results/kgraph_no_head_extract.json")

Matching relations...


### No Relation matching and no subset of relations

In [15]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, match_relations=False)

Extracting heads...


ValueError: No relation found to match

### No Relation matching with subset of relations

In [16]:
from kogito.core.relation import DESIRES, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, match_relations=False, relations=[CAUSES, DESIRES])
kgraph.to_jsonl("results/kgraph_no_match_subset.json")

Extracting heads...


### No Head extraction, no Relation matching with subset of relations (hence, ultimate manual specification)

In [17]:
from kogito.core.relation import DESIRES, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, extract_heads=False, match_relations=False, relations=[CAUSES, DESIRES])
kgraph.to_jsonl("results/kgraph_manual.json")

### Custom heads

In [18]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph

csi = CommonsenseInference()
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(heads=["post office", "to get out of the room"], dry_run=True)
kgraph.to_jsonl("results/kgraph_manual_heads.json")

Matching relations...


### Model based Relation matching (SWEM)

In [1]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import SWEMRelationMatcher
from kogito.models.bart.comet import COMETBART
from kogito.core.model import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
swem_matcher = SWEMRelationMatcher("swem_relation_matcher")
csi.add_processor(swem_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("/Users/mismayil/Desktop/EPFL/nlplab/comet-atomic-2020/comet-atomic_2020_BART")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_modelbased_relations_swem.json")

  from .autonotebook import tqdm as notebook_tqdm
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/kogito-rc-swem/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/kogito-rc-swem/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
INFO:torch.distributed.nn.jit.instantiator:Created a temporary directory at /var/folders/rs/c9bqjyq95q59ngc5v1t0gz_00000gn/T/tmpwz6_d8ub
INFO:torch.distributed.nn.jit.instantiator:Writing /var/folders/rs/c9bqjyq95q59ngc5v1t0gz_00000gn/T/tmpwz6_d8ub/_remote_module_non_sriptable.py


Matching relations...


  rank_zero_warn(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 59.87it/s]
Took 0.688650369644165 seconds




### Model based Relation matching (DistilBERT)

In [3]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import DistilBERTRelationMatcher
from kogito.core.model import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
dbert_matcher = DistilBERTRelationMatcher("dbert_relation_matcher")
csi.add_processor(dbert_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("/Users/mismayil/Desktop/EPFL/nlplab/comet-atomic-2020/comet-atomic_2020_BART")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_modelbased_relations_dbert.json")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/kogito-rc-distilbert/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/kogito-rc-distilbert/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.

Matching relations...


DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/vocab.txt HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/added_tokens.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/special_tokens_map.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/config.json HTTP/1.1" 200 0
GP

Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.56it/s]
Took 3.07722806930542 seconds


### Model based Relation Matching (BERT)

In [4]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import BERTRelationMatcher
from kogito.core.model import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
bert_matcher = BERTRelationMatcher("dbert_relation_matcher")
csi.add_processor(bert_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("/Users/mismayil/Desktop/EPFL/nlplab/comet-atomic-2020/comet-atomic_2020_BART")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_modelbased_relations_bert.json")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/kogito-rc-bert/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /mismayil/kogito-rc-bert/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/pytorch_model.bin HTTP/1.1" 302 0
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transfor

Matching relations...


DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/vocab.txt HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/added_tokens.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/special_tokens_map.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /bert-base-uncased/resolve/main/config.json HTTP/1.1" 200 0
GPU available: False, used: Fals

Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  3.01it/s]
Took 4.072222948074341 seconds


### GPT-3 Based Commonsense Inference

In [1]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.gpt3.zeroshot import GPT3Zeroshot
import time, os

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")

model = GPT3Zeroshot(api_key="", model_name="text-davinci-002")
sample_graph = KnowledgeGraph.from_csv("sample_graph.tsv", sep="\t", header=None)
heads = ["PersonX accuses PersonY of cheating", "PersonX aces PersonX's exam"]

start = time.time()
kgraph = csi.infer(model=model, heads=heads, sample_graph=sample_graph, model_args={"debug": True})
end = time.time()

print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_gpt3.json")

  from .autonotebook import tqdm as notebook_tqdm


Matching relations...
Generating commonsense graph...
Took 2.3603579998016357 seconds


### GPT-3 Based Commonsense Inference with custom relation

In [2]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.gpt3.zeroshot import GPT3Zeroshot
from kogito.core.relation import KnowledgeRelation, register_relation
import time, os

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")

def x_want2_verbalizer(head, **kwargs):
    index = kwargs.get("index")
    index_txt = f"{index}" if index is not None else ""
    return f"Situation {index_txt}: {head}\nWants: As a result, PersonX wants"

X_WANT2 = KnowledgeRelation("xWant2",
                            verbalizer=x_want2_verbalizer,
                            prompt="How does this situation affect each character's wants?")
register_relation(X_WANT2)

model = GPT3Zeroshot(api_key="", model_name="text-davinci-002")

sample_graph = KnowledgeGraph.from_csv("sample_graph2.tsv", sep="\t", header=None)

heads = ["PersonX makes a huge mistake", "PersonX sees PersonY's point"]

start = time.time()
kgraph = csi.infer(model=model,
                   heads=heads,
                   sample_graph=sample_graph,
                   model_args={"debug": True})
end = time.time()

print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_gpt3_custom_relation.json")

Matching relations...
Generating commonsense graph...
Took 2.438878297805786 seconds
