In [6]:
%load_ext autoreload
%autoreload 2

### Sample usage

In [None]:
from kogito.models.bart.comet_bart import COMETBART
from kogito.inference import CommonsenseInference
from kogito.models.base import KnowledgeModel
from kogito.core.knowledge import KnowledgeGraph

model: KnowledgeModel = COMETBART.from_pretrained("/Users/mismayil/Desktop/EPFL/nlplab/comet-atomic-2020/comet-atomic_2020_BART")
csi = CommonsenseInference()
text = "Gabby always brought cookies to school. But at lunch, everyone wanted them. And she had a hard time saying no. Gabby began to hate the other students. And at lunch, she ate far away from everyone."
kgraph: KnowledgeGraph = csi.infer(text, model, model_args={"num_generate": 3, "batch_size": 128})
kgraph.to_jsonl("kgraph2.json")

### Customizing processors

In [None]:
csi.processors

In [None]:
csi.remove_processor("noun_phrase_extractor")

In [None]:
csi.processors

In [None]:
from kogito.core.head import KnowledgeHeadExtractor, KnowledgeHead, KnowledgeHeadType
from typing import Optional, List
from spacy.tokens import Doc

import spacy

class NounHeadExtractor(KnowledgeHeadExtractor):
    def extract(self, text: str, doc: Optional[Doc] = None) -> List[KnowledgeHead]:
        if not doc:
            doc = self.lang(text)

        heads = []

        for token in doc:
            if token.pos_ == "NOUN":
                heads.append(KnowledgeHead(text=token.text, type=KnowledgeHeadType.NOUN_PHRASE, entity=token))
        
        return heads

noun_extractor = NounHeadExtractor("noun_extractor", spacy.load("en_core_web_sm"))
csi.add_processor(noun_extractor)

In [None]:
csi.processors

In [1]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.bart.comet_bart import COMETBART
from kogito.models.base import KnowledgeModel

csi = CommonsenseInference()


### Dry-run

In [2]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True)
kgraph.to_jsonl("kgraph_dry_run.json")

Extracting heads...
Matching relations...


In [5]:
text = "I wanted to feed him. he didnt listen to me"
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True)
kgraph.to_jsonl("kgraph_dry_run_2.json")

Extracting heads...
Matching relations...


### Relation subset

In [4]:

from kogito.core.relation import OBJECT_USE, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, relations=[OBJECT_USE, CAUSES])
kgraph.to_jsonl("kgraph_rel_subset.json")

Extracting heads...
Matching relations...


### No Head extraction

In [5]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, extract_heads=False)
kgraph.to_jsonl("kgraph_no_head_extract.json")

Matching relations...


### No Relation matching and no subset of relations

In [6]:
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, match_relations=False)

Extracting heads...


ValueError: No relation found to match

### No Relation matching with subset of relations

In [7]:
from kogito.core.relation import DESIRES, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, match_relations=False, relations=[CAUSES, DESIRES])
kgraph.to_jsonl("kgraph_no_match_subset.json")

Extracting heads...


### No Head extraction, no Relation matching with subset of relations (hence, ultimate manual specification)

In [8]:
from kogito.core.relation import DESIRES, CAUSES
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(text, dry_run=True, extract_heads=False, match_relations=False, relations=[CAUSES, DESIRES])
kgraph.to_jsonl("kgraph_manual.json")

### Custom heads

In [9]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph

csi = CommonsenseInference()
text = "Gabby always brought cookies to school."
kgraph: KnowledgeGraph = csi.infer(heads=["post office", "to get out of the room"], dry_run=True)
kgraph.to_jsonl("kgraph_manual_heads.json")

Matching relations...


### Model based Relation matching (SWEM)

In [7]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import SWEMRelationMatcher
from kogito.models.bart.comet_bart import COMETBART
from kogito.models.base import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
swem_matcher = SWEMRelationMatcher("swem_relation_matcher")
csi.add_processor(swem_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("/Users/mismayil/Desktop/EPFL/nlplab/comet-atomic-2020/comet-atomic_2020_BART")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("kgraph_modelbased_relations_swem.json")

Matching relations...
Took 0.5735430717468262 seconds


### Model based Relation matching (DistilBERT)

In [6]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.core.processors.relation import DistilBertRelationMatcher
from kogito.models.base import KnowledgeModel
import time

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")
dbert_matcher = DistilBertRelationMatcher("dbert_relation_matcher")
csi.add_processor(dbert_matcher)
start = time.time()
# model: KnowledgeModel = COMETBART.from_pretrained("/Users/mismayil/Desktop/EPFL/nlplab/comet-atomic-2020/comet-atomic_2020_BART")
kgraph: KnowledgeGraph = csi.infer(heads=["banana", "love another", "Student gets a card"], dry_run=True)
end = time.time()
print(f"Took {end-start} seconds")
kgraph.to_jsonl("kgraph_modelbased_relations_dbert.json")

DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443


Matching relations...


DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/vocab.txt HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/added_tokens.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/special_tokens_map.json HTTP/1.1" 404 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/tokenizer_config.json HTTP/1.1" 200 0
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /distilbert-base-uncased/resolve/main/config.json HTTP/1.1" 200 0
IN

Predicting: 0it [00:00, ?it/s]

Took 4.495680809020996 seconds


### GPT-3 Based Commonsense Inference

In [2]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.gpt3 import GPT3Zeroshot
import time, os

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")

model = GPT3Zeroshot(api_key="", model_name="text-davinci-002")
sample_graph = KnowledgeGraph.from_csv("sample_graph.tsv", sep="\t", header=None)
heads = ["PersonX accuses PersonY of cheating", "PersonX aces PersonX's exam"]

start = time.time()
kgraph = csi.infer(model=model, heads=heads, sample_graph=sample_graph, model_args={"debug": True})
end = time.time()

print(f"Took {end-start} seconds")
kgraph.to_jsonl("kgraph_gpt3.json")

Matching relations...
Generating commonsense graph...
Took 1.9707791805267334 seconds


### GPT-3 Based Commonsense Inference with custom relation

In [4]:
from kogito.inference import CommonsenseInference
from kogito.core.knowledge import KnowledgeGraph
from kogito.models.gpt3 import GPT3Zeroshot
from kogito.core.relation import KnowledgeRelation, register_relation
import time, os

csi = CommonsenseInference()
csi.remove_processor("simple_relation_matcher")

def x_want2_verbalizer(head, **kwargs):
    index = kwargs.get("index")
    index_txt = f"{index}" if index is not None else ""
    return f"Situation {index_txt}: {head}\nWants: As a result, PersonX wants"

X_WANT2 = KnowledgeRelation("xWant2",
                            verbalizer=x_want2_verbalizer,
                            prompt="How does this situation affect each character's wants?")
register_relation(X_WANT2)

model = GPT3Zeroshot(api_key="", model_name="text-davinci-002")

sample_graph = KnowledgeGraph.from_csv("sample_graph2.tsv", sep="\t", header=None)

heads = ["PersonX makes a huge mistake", "PersonX sees PersonY's point"]

start = time.time()
kgraph = csi.infer(model=model,
                   heads=heads,
                   sample_graph=sample_graph,
                   model_args={"debug": True})
end = time.time()

print(f"Took {end-start} seconds")
kgraph.to_jsonl("kgraph_gpt3_custom_relation.json")

Matching relations...
Generating commonsense graph...
Took 1.8346340656280518 seconds
