# 🚞 Zero-shot RE Training

In [33]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [34]:
# # if you're running this in a colab notebook, you can run this cell to install the necessary dependencies
# !git clone https://github.com/jackboyla/GLiREL.git
# !cd GLiREL && pip install -e .
# !python -m spacy download en_core_web_sm

In [35]:
from glirel import GLiREL

save_path = 'logs/zero_rel/zero_rel-2024-05-30__19-30-11/model_1500'
model = GLiREL.from_pretrained(save_path, device='cpu')
# model

config.json not found in /home/jackboylan/GLiREL/logs/zero_rel/zero_rel-2024-05-30__19-30-11/model_1500


# Inference

To infer, the model needs `tokens`, `NER`, and `zero shot labels`.

### Eval data

In [36]:
import json
with open('./data/few_rel_eval.jsonl', 'r') as f:
    data = [json.loads(line) for line in f]

i = 0

tokens = data[i]['tokenized_text']
ner = data[i]['ner']
labels = list(set([r['relation_text'] for r in data[i]['relations']]))
print(tokens)
print()
print(ner)
print(labels)

['The', 'race', 'took', 'place', 'between', 'Godstow', 'and', 'Binsey', 'along', 'the', 'Upper', 'River', 'Thames', '.']

[[7, 8, 'Q4914513', 'Binsey'], [11, 13, 'Q19686', 'River Thames']]
['located in or next to body of water']


In [37]:
labels = ['country of origin', 'licensed to broadcast to', 'father', 'followed by'] + labels
print(labels)

['country of origin', 'licensed to broadcast to', 'father', 'followed by', 'located in or next to body of water']


In [38]:
relations = model.predict_relations(tokens, labels, threshold=0.01, ner=ner)

print('Number of relations:', len(relations))  # num entity pairs (both directions) * num classes.... provided they're over the threshold

sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
print("\nDescending Order by Score:")
for item in sorted_data_desc:
    print(item)

Number of relations: 10

Descending Order by Score:
{'head_pos': [11, 13], 'tail_pos': [7, 8], 'head_text': ['River', 'Thames'], 'tail_text': ['Binsey'], 'label': 'country of origin', 'score': 0.14839360117912292}
{'head_pos': [7, 8], 'tail_pos': [11, 13], 'head_text': ['Binsey'], 'tail_text': ['River', 'Thames'], 'label': 'country of origin', 'score': 0.14803923666477203}
{'head_pos': [7, 8], 'tail_pos': [11, 13], 'head_text': ['Binsey'], 'tail_text': ['River', 'Thames'], 'label': 'followed by', 'score': 0.1224965825676918}
{'head_pos': [11, 13], 'tail_pos': [7, 8], 'head_text': ['River', 'Thames'], 'tail_text': ['Binsey'], 'label': 'followed by', 'score': 0.12224692106246948}
{'head_pos': [7, 8], 'tail_pos': [11, 13], 'head_text': ['Binsey'], 'tail_text': ['River', 'Thames'], 'label': 'father', 'score': 0.1204436868429184}
{'head_pos': [11, 13], 'tail_pos': [7, 8], 'head_text': ['River', 'Thames'], 'tail_text': ['Binsey'], 'label': 'father', 'score': 0.12020771950483322}
{'head_pos':

### Real-world example

In [39]:
# Real-world example
import spacy
nlp = spacy.load('en_core_web_sm')

text = "Apple Inc. was founded by Steve Jobs, Steve Wozniak, and Ronald Wayne in April 1976. The company is headquartered in Cupertino, California."

# text = "Jack Dorsey's father, Tim Dorsey, is a licensed pilot. Jack met his wife Sarah Paulson in New York in 2003. They have one son, Edward."

labels = [
    'co-founder', 
    'country of origin', 
    'licensed to broadcast to', 
    'no relation', 'parent', 
    'followed by', 
    'located in or next to body of water', 
    'spouse', 
    'child', 
    'founder', 
    'headquartered in', 
    'acquired by', 
    'subsidiary of'
    ]



def predict_and_show(text, labels):
    doc = nlp(text)

    tokens = [token.text for token in doc]

    ner = [[ent.start, ent.end, ent.label_, ent.text] for ent in doc.ents]
    print(f"Entities detected: {ner}")

    relations = model.predict_relations(tokens, labels, threshold=0.01, ner=ner)

    print('Number of relations:', len(relations))

    sorted_data_desc = sorted(relations, key=lambda x: x['score'], reverse=True)
    print("\nDescending Order by Score:")
    for item in sorted_data_desc:
        print(f"{item['head_text']} --> {item['label']} --> {item['tail_text']} | score: {item['score']}")

predict_and_show(text, labels)

Entities detected: [[0, 2, 'ORG', 'Apple Inc.'], [5, 7, 'PERSON', 'Steve Jobs'], [8, 10, 'PERSON', 'Steve Wozniak'], [12, 14, 'PERSON', 'Ronald Wayne'], [15, 17, 'DATE', 'April 1976'], [23, 24, 'GPE', 'Cupertino'], [25, 26, 'GPE', 'California']]
Number of relations: 546

Descending Order by Score:
['Apple', 'Inc.'] --> co-founder --> ['California'] | score: 0.14972315728664398
['California'] --> co-founder --> ['Apple', 'Inc.'] | score: 0.1495904177427292
['Steve', 'Wozniak'] --> co-founder --> ['California'] | score: 0.1480676233768463
['California'] --> co-founder --> ['Steve', 'Wozniak'] | score: 0.14771325886249542
['Cupertino'] --> co-founder --> ['California'] | score: 0.14632150530815125
['April', '1976'] --> co-founder --> ['California'] | score: 0.14591266214847565
['California'] --> co-founder --> ['Cupertino'] | score: 0.14553959667682648
['Steve', 'Jobs'] --> co-founder --> ['California'] | score: 0.1454467624425888
['Ronald', 'Wayne'] --> co-founder --> ['California'] | sc

In [40]:
text = "Jack knows Gill. They live in the same house in London. They are not related."
labels = ['family relation', 'knows', 'lives with', 'loves', 'licensed to broadcast to', 'father', 'followed by', 'no relation', 'lives in',]
predict_and_show(text, labels)

Entities detected: [[0, 1, 'PERSON', 'Jack'], [2, 3, 'PERSON', 'Gill'], [11, 12, 'GPE', 'London']]


Number of relations: 54

Descending Order by Score:
['Gill'] --> family relation --> ['Jack'] | score: 0.13734351098537445
['Jack'] --> family relation --> ['Gill'] | score: 0.13579922914505005
['London'] --> family relation --> ['Jack'] | score: 0.1350933462381363
['Jack'] --> family relation --> ['London'] | score: 0.1340429186820984
['Gill'] --> family relation --> ['London'] | score: 0.12584228813648224
['London'] --> family relation --> ['Gill'] | score: 0.12538231909275055
['Gill'] --> lives with --> ['Jack'] | score: 0.11385937035083771
['Jack'] --> lives with --> ['Gill'] | score: 0.11284440010786057
['London'] --> lives with --> ['Jack'] | score: 0.11166220903396606
['Jack'] --> lives with --> ['London'] | score: 0.11117828637361526
['Gill'] --> father --> ['Jack'] | score: 0.1069408506155014
['Jack'] --> father --> ['Gill'] | score: 0.10569822788238525
['Gill'] --> followed by --> ['Jack'] | score: 0.10547350347042084
['London'] --> father --> ['Jack'] | score: 0.104903802275

In [41]:
import spacy
from spacy.tokens import Span
from spacy import displacy

def visualize_relation(text, relations):
    nlp = spacy.blank("en")
    doc = nlp(text)

    # Manually set dependency relations to visualize relations
    for token in doc:
        token.dep_ = "dep"  # default to 'dep'

    spans = []
    # Calculate character offsets for each entity
    for rel in relations:
        head = Span(doc, rel['head_pos'][0], rel['head_pos'][1], label=rel['head_text'])
        tail = Span(doc, rel['tail_pos'][0], rel['tail_pos'][1], label=rel['tail_text'])

        doc.ents += (head, tail)

        # Mock dependencies
        head_root = head.root
        tail_root = tail.root

        head_root.dep_ = "rel"  # Relation type can be customized
        head_root.head = tail_root  # Point head to tail

    options = {"fine_grained": True}
    displacy.render(doc, style="dep", options=options, jupyter=True)

# Example data
text = "Binsey located in or next to body of water River Thames"
relations = [
    {'head_pos': [0, 1], 'tail_pos': [9, 11], 'head_text': 'Binsey', 'tail_text': 'River Thames', 'label': 'located in or next to body of water', 'score': 0.9235768914222717},
    # {'head_pos': [9, 11], 'tail_pos': [0, 1], 'head_text': 'River Thames', 'tail_text': 'Binsey', 'label': 'located in or next to body of water', 'score': 0.12615662813186646}
]

visualize_relation(text, relations)
