In [1]:
%load_ext autoreload
%autoreload 2

### Quickstart

In [3]:
from kogito.models.bart.comet import COMETBART
from kogito.inference import CommonsenseInference

# Load pre-trained model from HuggingFace
model = COMETBART.from_pretrained("mismayil/comet-bart-ai2")

# Initialize inference module with a spacy language pipeline
csi = CommonsenseInference(language="en_core_web_sm")

# Run inference
text = "PersonX becomes a great basketball player"
kgraph = csi.infer(text, model)

# Save output knowledge graph to JSON file
kgraph.to_jsonl("kgraph.json")

Extracting heads...
Matching relations...
Generating commonsense graph...


100%|██████████| 1/1 [00:16<00:00, 16.97s/it]


### Knowledge

In [4]:
from kogito.core.head import KnowledgeHead
from kogito.core.knowledge import Knowledge
from kogito.core.relation import X_NEED

head = KnowledgeHead("PersonX buys lunch")
knowledge = Knowledge(head=head, relation=X_NEED, tails=["bring a wallet"])

### Knowledge Graph

In [5]:
from kogito.core.knowledge import Knowledge, KnowledgeGraph
from kogito.core.head import KnowledgeHead
from kogito.core.relation import X_NEED, CAUSES

knowledge1 = Knowledge(head=KnowledgeHead("PersonX buys lunch"), relation=X_NEED, tails=["bring a wallet"])
knowledge2 = Knowledge(head=KnowledgeHead("Throwing a party"), relation=CAUSES, tails=["have fun"])

kgraph = KnowledgeGraph([knowledge1, knowledge2])

In [6]:
for knowledge in kgraph:
    print(knowledge)

Knowledge(head="PersonX buys lunch", relation="xNeed", tails=['bring a wallet'])
Knowledge(head="Throwing a party", relation="Causes", tails=['have fun'])


In [8]:
# From csv
kgraph1 = KnowledgeGraph.from_csv("sample_graph.tsv", sep="\t", header=None)

# From jsonl (list of json objects)
kgraph2 = KnowledgeGraph.from_jsonl("sample_graph.jsonl", head_attr="source", relation_attr="rel", tails_attr="targets")

In [9]:
# Union
kgraph3 = kgraph1 + kgraph2 # kgraph1.union(kgraph2)

# Intersection
kgraph3 = kgraph1 & kgraph2 # kgraph1.intersection(kgraph2)

# Difference
kgraph3 = kgraph1 - kgraph2 # kgraph1.difference(kgraph2)

In [10]:
kgraph3.to_jsonl("sample_graph3.jsonl")

### Saving models

In [None]:
model.save_pretrained("./comet-bart")

### Customizing processors

In [None]:
csi.processors

In [None]:
csi.remove_processor("noun_phrase_extractor")

In [None]:
csi.processors

### Custom Head Extractor

In [11]:
from typing import Optional, List
from spacy.tokens import Doc
import spacy

from kogito.core.processors.head import KnowledgeHeadExtractor, KnowledgeHead

class AdjectiveHeadExtractor(KnowledgeHeadExtractor):
   def extract(self, text: str, doc: Optional[Doc] = None) -> List[KnowledgeHead]:
      if not doc:
            doc = self.lang(text)

      heads = []

      for token in doc:
            if token.pos_ == "ADJ":
               heads.append(KnowledgeHead(text=token.text, entity=token))

      return heads

adj_extractor = AdjectiveHeadExtractor("adj_extractor", spacy.load("en_core_web_sm"))
csi.add_processor(adj_extractor)

In [12]:
csi.processors

{'head': ['sentence_extractor',
  'noun_phrase_extractor',
  'verb_phrase_extractor',
  'adj_extractor'],
 'relation': ['simple_relation_matcher', 'graph_relation_matcher']}

In [None]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph

csi = CommonsenseInference()


### Custom Relation Matcher

In [13]:
from typing import List, Tuple

from kogito.core.processors.head import KnowledgeHead
from kogito.core.processors.relation import KnowledgeRelationMatcher
from kogito.core.relation import KnowledgeRelation, X_NEED, CAUSES

class ConstantRelationMatcher(KnowledgeRelationMatcher):
   def match(
      self, heads: List[KnowledgeHead], relations: List[KnowledgeRelation] = None, **kwargs
   ) -> List[Tuple[KnowledgeHead, KnowledgeRelation]]:
      head_relations = []

      for head in heads:
            head_relations.append((head, X_NEED))
            head_relations.append((head, CAUSES))

      return head_relations

const_rel_matcher = ConstantRelationMatcher("const_rel_matcher", spacy.load("en_core_web_sm"))
csi.add_processor(const_rel_matcher)

In [14]:
csi.processors

{'head': ['sentence_extractor',
  'noun_phrase_extractor',
  'verb_phrase_extractor',
  'adj_extractor'],
 'relation': ['simple_relation_matcher',
  'graph_relation_matcher',
  'const_rel_matcher']}

### Dry-run

In [15]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True)
kgraph.to_jsonl("results/kgraph_dry_run.json")

Extracting heads...
Matching relations...


In [16]:
text = "I wanted to feed him. he didnt listen to me"
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True)
kgraph.to_jsonl("results/kgraph_dry_run_2.json")

Extracting heads...
Matching relations...


### Relation subset

In [17]:

from kogito.core.relation import OBJECT_USE, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, relations=[OBJECT_USE, CAUSES])
kgraph.to_jsonl("results/kgraph_rel_subset.json")

Extracting heads...
Matching relations...


### No Head extraction

In [18]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, extract_heads=False)
kgraph.to_jsonl("results/kgraph_no_head_extract.json")

Matching relations...


### No Relation matching and no subset of relations

In [19]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, match_relations=False)

Extracting heads...


ValueError: No relation found to match

### No Relation matching with subset of relations

In [20]:
from kogito.core.relation import DESIRES, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, match_relations=False, relations=[CAUSES, DESIRES])
kgraph.to_jsonl("results/kgraph_no_match_subset.json")

Extracting heads...


### No Head extraction, no Relation matching with subset of relations (hence, ultimate manual specification)

In [21]:
from kogito.core.relation import DESIRES, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, extract_heads=False, match_relations=False, relations=[CAUSES, DESIRES])
kgraph.to_jsonl("results/kgraph_manual.json")

### Custom heads

In [22]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph

csi = CommonsenseInference()
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(heads=["post office", "to get out of the room"], dry_run=True)
kgraph.to_jsonl("results/kgraph_manual_heads.json")

Matching relations...


### Model based Relation matching (SWEM)

In [23]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import SWEMRelationMatcher
from kogito.models.bart.comet import COMETBART
from kogito.core.model import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
swem_matcher = SWEMRelationMatcher("swem_relation_matcher")
csi.add_processor(swem_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("mismayil/comet-bart-ai2")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_modelbased_relations_swem.json")

Matching relations...


  rank_zero_warn(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00, 173.23it/s]
Took 0.6309409141540527 seconds




### Model based Relation matching (DistilBERT)

In [24]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import DistilBERTRelationMatcher
from kogito.core.model import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
dbert_matcher = DistilBERTRelationMatcher("dbert_relation_matcher")
csi.add_processor(dbert_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("mismayil/comet-bart-ai2")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_modelbased_relations_dbert.json")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.weight', 'vocab_transform.weight', 'vocab_layer_norm.bias']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Matching relations...


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  6.80it/s]
Took 2.5445749759674072 seconds


### Model based Relation Matching (BERT)

In [25]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import BERTRelationMatcher
from kogito.core.model import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
bert_matcher = BERTRelationMatcher("dbert_relation_matcher")
csi.add_processor(bert_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("mismayil/comet-bart-ai2")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_modelbased_relations_bert.json")

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Matching relations...


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████| 1/1 [00:00<00:00,  3.77it/s]
Took 2.6959590911865234 seconds


### GPT-3 Based Commonsense Inference

In [26]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.gpt3.zeroshot import GPT3Zeroshot
import time, os

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")

model = GPT3Zeroshot(api_key="", model_name="text-davinci-002")
sample_graph = KnowledgeGraph.from_csv("sample_graph.tsv", sep="\t", header=None)
heads = ["PersonX accuses PersonY of cheating", "PersonX aces PersonX's exam"]

start = time.time()
kgraph = csi.infer(model=model, heads=heads, sample_graph=sample_graph, model_args={"debug": True})
end = time.time()

print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_gpt3.json")

Matching relations...
Generating commonsense graph...
Took 3.8873648643493652 seconds


### GPT-3 Based Commonsense Inference with custom relation

In [35]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.gpt3.zeroshot import GPT3Zeroshot
from kogito.core.relation import KnowledgeRelation, register_relation
import time, os

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")

def x_wishes_verbalizer(head, **kwargs):
   # index will be passed from the model
   # so that we can enumerate samples which helps with inference
   index = kwargs.get("index")
   index_txt = f"{index}" if index is not None else ""
   return f"Situation {index_txt}: {head}\nWishes: As a result, PersonX wishes"

X_WISHES = KnowledgeRelation("xWishes",
                             verbalizer=x_wishes_verbalizer,
                             prompt="How does this situation affect each character's wishes?")
register_relation(X_WISHES)

model = GPT3Zeroshot(api_key="", model_name="text-davinci-002")

sample_graph = KnowledgeGraph.from_csv("sample_graph2.tsv", sep="\t", header=None)

heads = ["PersonX makes a huge mistake", "PersonX sees PersonY's point"]

start = time.time()
kgraph = csi.infer(model=model,
                   heads=heads,
                   sample_graph=sample_graph,
                   model_args={"debug": True, "top_p": 0.5, "stop": "\n"})
end = time.time()

print(f"Took {end-start} seconds")
kgraph.to_jsonl("results/kgraph_gpt3_custom_relation.json")

Matching relations...
Generating commonsense graph...
Took 3.2485480308532715 seconds


### Commonsense Knowledge Linking

In [1]:
from kogito.linkers.deberta import DebertaLinker
from kogito.core.knowledge import KnowledgeGraph

linker = DebertaLinker("../ComFact_DeBERTa/deberta-large-nlu-fact_full/checkpoint-236560")
context = [
      "joey was pretending to drive his wife to work .",
      "the truth was that he was taking her on a trip .",
      "when they passed the road for her workplace , she asked what was up .",
      "that 's when he announced the trip detour plans .",
      "his wife was so thrilled and they really enjoyed their trip together ."
    ]
input_graph = KnowledgeGraph.from_csv("sample_linking_graph.csv", sep="|", header=None)
relevance_probs = linker.link(input_graph, context)
print(relevance_probs)

filtered_graph = linker.filter(input_graph, context)

print(filtered_graph)

  from .autonotebook import tqdm as notebook_tqdm


[0.0002484701981302351, 6.804546865168959e-05, 0.9954416751861572]
Knowledge(head="drive", relation="HasSubEvent", tails=[' get into car'])


  attention_scores = torch.bmm(query_layer, key_layer.transpose(-1, -2)) / torch.tensor(
  score += c2p_att / torch.tensor(scale, dtype=c2p_att.dtype)
  score += p2c_att / torch.tensor(scale, dtype=p2c_att.dtype)
