In [1]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import math
import torch
import wikipedia
from newspaper import Article, ArticleException
from GoogleNews import GoogleNews
import IPython
from pyvis.network import Network

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.cuda.is_available()

True

In [3]:
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Babelscape/rebel-large")
model = AutoModelForSeq2SeqLM.from_pretrained("Babelscape/rebel-large").to("cuda")

In [4]:
def extract_relations_from_model_output(text):
    relations = []
    relation, subject, relation, object_ = '', '', '', ''
    text = text.strip()
    current = 'x'
    text_replaced = text.replace("<s>", "").replace("<pad>", "").replace("</s>", "")
    for token in text_replaced.split():
        if token == "<triplet>":
            current = 't'
            if relation != '':
                relations.append({
                    'head': subject.strip(),
                    'type': relation.strip(),
                    'tail': object_.strip()
                })
                relation = ''
            subject = ''
        elif token == "<subj>":
            current = 's'
            if relation != '':
                relations.append({
                    'head': subject.strip(),
                    'type': relation.strip(),
                    'tail': object_.strip()
                })
            object_ = ''
        elif token == "<obj>":
            current = 'o'
            relation = ''
        else:
            if current == 't':
                subject += ' ' + token
            elif current == 's':
                object_ += ' ' + token
            elif current == 'o':
                relation += ' ' + token
    if subject != '' and relation != '' and object_ != '':
        relations.append({
            'head': subject.strip(),
            'type': relation.strip(),
            'tail': object_.strip()
        })
    return relations

In [None]:
class KB():
    def __init__(self):
        self.relations = []

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    def add_relation(self, r):
        if not self.exists_relation(r):
            self.relations.append(r)

    def print(self):
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")

In [None]:

def from_small_text_to_kb(text, verbose=False):
    kb = KB()

    # Tokenizer text
    model_inputs = tokenizer(text, max_length=512, padding=True, truncation=True,
                            return_tensors='pt')
    if verbose:
        print(f"Num tokens: {len(model_inputs['input_ids'][0])}")

    # Generate
    gen_kwargs = {
        "max_length": 216,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": 3
    }
    generated_tokens = model.generate(
        **model_inputs.to(model.device),
        **gen_kwargs,
    )
    decoded_preds = tokenizer.batch_decode(generated_tokens, skip_special_tokens=False)

    # create kb
    for sentence_pred in decoded_preds:
        relations = extract_relations_from_model_output(sentence_pred)
        for r in relations:
            kb.add_relation(r)

    return kb

In [None]:

text = "Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 " \
"May 1821), and later known by his regnal name Napoleon I, was a French military " \
"and political leader who rose to prominence during the French Revolution and led " \
"several successful campaigns during the Revolutionary Wars. He was the de facto " \
"leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, " \
"he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's " \
"political and cultural legacy has endured, and he has been one of the most " \
"celebrated and controversial leaders in world history."

kb = from_small_text_to_kb(text, verbose=True)
kb.print()
# Num tokens: 133
# Relations:
#   {'head': 'Napoleon Bonaparte', 'type': 'date of birth', 'tail': '15 August 1769'}
#   {'head': 'Napoleon Bonaparte', 'type': 'date of death', 'tail': '5 May 1821'}
#   {'head': 'Napoleon Bonaparte', 'type': 'participant in', 'tail': 'French Revolution'}
#   {'head': 'Napoleon Bonaparte', 'type': 'conflict', 'tail': 'Revolutionary Wars'}
#   {'head': 'Revolutionary Wars', 'type': 'part of', 'tail': 'French Revolution'}
#   {'head': 'French Revolution', 'type': 'participant', 'tail': 'Napoleon Bonaparte'}
#   {'head': 'Revolutionary Wars', 'type': 'participant', 'tail': 'Napoleon Bonaparte'}

In [5]:
class KB():
    def __init__(self):
        self.relations = []

    def are_relations_equal(self, r1, r2):
        return all(r1[attr] == r2[attr] for attr in ["head", "type", "tail"])

    def exists_relation(self, r1):
        return any(self.are_relations_equal(r1, r2) for r2 in self.relations)

    # def add_relation(self, r):
    #     if not self.exists_relation(r):
    #         self.relations.append(r)

    def print(self):
        print("Relations:")
        for r in self.relations:
            print(f"  {r}")

    def merge_relations(self, r1):
        r2 = [r for r in self.relations
              if self.are_relations_equal(r1, r)][0]
        spans_to_add = [span for span in r1["meta"]["spans"]
                        if span not in r2["meta"]["spans"]]
        r2["meta"]["spans"] += spans_to_add

    def add_relation(self, r):
        if not self.exists_relation(r):
            self.relations.append(r)
        else:
            self.merge_relations(r)

In [6]:
def from_text_to_kb(text, span_length=128, verbose=False):
    # tokenize whole text
    inputs = tokenizer([text], return_tensors="pt")

    # compute span boundaries
    num_tokens = len(inputs["input_ids"][0])
    if verbose:
        print(f"Input has {num_tokens} tokens")
    num_spans = math.ceil(num_tokens / span_length)
    if verbose:
        print(f"Input has {num_spans} spans")
    overlap = math.ceil((num_spans * span_length - num_tokens) / 
                        max(num_spans - 1, 1))
    spans_boundaries = []
    start = 0
    for i in range(num_spans):
        spans_boundaries.append([start + span_length * i,
                                 start + span_length * (i + 1)])
        start -= overlap
    if verbose:
        print(f"Span boundaries are {spans_boundaries}")

    # transform input with spans
    tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
    tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]
    inputs = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }

    # generate relations
    num_return_sequences = 3
    gen_kwargs = {
        "max_length": 256,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": num_return_sequences
    }
    generated_tokens = model.generate(
        **inputs,
        **gen_kwargs,
    )

    # decode relations
    decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

    # create kb
    kb = KB()
    i = 0
    for sentence_pred in decoded_preds:
        current_span_index = i // num_return_sequences
        relations = extract_relations_from_model_output(sentence_pred)
        for relation in relations:
            relation["meta"] = {
                "spans": [spans_boundaries[current_span_index]]
            }
            kb.add_relation(relation)
        i += 1

    return kb

In [7]:
def from_text_to_kb(text, span_length=128, verbose=False):
    # tokenize whole text
    inputs = tokenizer([text], return_tensors="pt")

    # compute span boundaries
    num_tokens = len(inputs["input_ids"][0])
    if verbose:
        print(f"Input has {num_tokens} tokens")
    num_spans = math.ceil(num_tokens / span_length)
    if verbose:
        print(f"Input has {num_spans} spans")
    overlap = math.ceil((num_spans * span_length - num_tokens) / 
                        max(num_spans - 1, 1))
    spans_boundaries = []
    start = 0
    for i in range(num_spans):
        spans_boundaries.append([start + span_length * i,
                                 start + span_length * (i + 1)])
        start -= overlap
    if verbose:
        print(f"Span boundaries are {spans_boundaries}")

    # transform input with spans
    tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
    tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]
    inputs = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }

    # generate relations
    num_return_sequences = 3
    gen_kwargs = {
        "max_length": 256,
        "length_penalty": 0,
        "num_beams": 3,
        "num_return_sequences": num_return_sequences,
    }
    generated_tokens = model.generate(
        **inputs,
        **gen_kwargs,
    )

    # decode relations
    decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

    # create kb
    kb = KB()
    i = 0
    for sentence_pred in decoded_preds:
        current_span_index = i // num_return_sequences
        relations = extract_relations_from_model_output(sentence_pred)
        for relation in relations:
            relation["meta"] = {
                "spans": [spans_boundaries[current_span_index]]
            }
            kb.add_relation(relation)
        i += 1

    return kb

In [None]:
text = """
Napoleon Bonaparte (born Napoleone di Buonaparte; 15 August 1769 – 5 May 1821), and later known by his regnal name Napoleon I, was a French military and political leader who rose to prominence during the French Revolution and led several successful campaigns during the Revolutionary Wars. He was the de facto leader of the French Republic as First Consul from 1799 to 1804. As Napoleon I, he was Emperor of the French from 1804 until 1814 and again in 1815. Napoleon's political and cultural legacy has endured, and he has been one of the most celebrated and controversial leaders in world history. Napoleon was born on the island of Corsica not long after its annexation by the Kingdom of France.[5] He supported the French Revolution in 1789 while serving in the French army, and tried to spread its ideals to his native Corsica. He rose rapidly in the Army after he saved the governing French Directory by firing on royalist insurgents. In 1796, he began a military campaign against the Austrians and their Italian allies, scoring decisive victories and becoming a national hero. Two years later, he led a military expedition to Egypt that served as a springboard to political power. He engineered a coup in November 1799 and became First Consul of the Republic. Differences with the British meant that the French faced the War of the Third Coalition by 1805. Napoleon shattered this coalition with victories in the Ulm Campaign, and at the Battle of Austerlitz, which led to the dissolving of the Holy Roman Empire. In 1806, the Fourth Coalition took up arms against him because Prussia became worried about growing French influence on the continent. Napoleon knocked out Prussia at the battles of Jena and Auerstedt, marched the Grande Armée into Eastern Europe, annihilating the Russians in June 1807 at Friedland, and forcing the defeated nations of the Fourth Coalition to accept the Treaties of Tilsit. Two years later, the Austrians challenged the French again during the War of the Fifth Coalition, but Napoleon solidified his grip over Europe after triumphing at the Battle of Wagram. Hoping to extend the Continental System, his embargo against Britain, Napoleon invaded the Iberian Peninsula and declared his brother Joseph King of Spain in 1808. The Spanish and the Portuguese revolted in the Peninsular War, culminating in defeat for Napoleon's marshals. Napoleon launched an invasion of Russia in the summer of 1812. The resulting campaign witnessed the catastrophic retreat of Napoleon's Grande Armée. In 1813, Prussia and Austria joined Russian forces in a Sixth Coalition against France. A chaotic military campaign resulted in a large coalition army defeating Napoleon at the Battle of Leipzig in October 1813. The coalition invaded France and captured Paris, forcing Napoleon to abdicate in April 1814. He was exiled to the island of Elba, between Corsica and Italy. In France, the Bourbons were restored to power. However, Napoleon escaped Elba in February 1815 and took control of France.[6][7] The Allies responded by forming a Seventh Coalition, which defeated Napoleon at the Battle of Waterloo in June 1815. The British exiled him to the remote island of Saint Helena in the Atlantic, where he died in 1821 at the age of 51. Napoleon had an extensive impact on the modern world, bringing liberal reforms to the many countries he conquered, especially the Low Countries, Switzerland, and parts of modern Italy and Germany. He implemented liberal policies in France and Western Europe.
"""

kb = from_text_to_kb(text, verbose=True)
kb.print()
# Input has 726 tokens
# Input has 6 spans
# Span boundaries are [[0, 128], [119, 247], [238, 366], [357, 485], [476, 604], [595, 723]]
# Relations:
#   {'head': 'Napoleon Bonaparte', 'type': 'date of birth',
#    'tail': '15 August 1769', 'meta': {'spans': [[0, 128]]}}
#   ...
#   {'head': 'Napoleon', 'type': 'place of birth',
#    'tail': 'Corsica', 'meta': {'spans': [[119, 247]]}}
#   ...
#   {'head': 'Fourth Coalition', 'type': 'start time',
#    'tail': '1806', 'meta': {'spans': [[238, 366]]}}
#   ...

In [8]:
import requests
import json
def generate_context(query:str, num_urls:int):
    
    response= requests.post("https://qagen.paperbot.ai/extract_all_passages/", json={
                                                                                    "query": query,
                                                                                    "num_urls": int(num_urls),
                                                                                    } )
    
    if response.ok:
        # d= eval(response.content)
        paragrahs= json.loads(response.content.decode(
                                                        'utf-8'
                                                    ))['paragraphs']
        return ''.join(paragrahs)
        
    else:
        print("Couldn't get the response from the 'extract-all-passages'   🥲")
        return 

In [30]:
response= requests.post("https://qagen.paperbot.ai/extract_all_passages/", json={
                                                                                    "query": "best cat ear headphones",
                                                                                    "num_urls": int(1),
                                                                                    } )
json.loads(response.content.decode(
                                                        'utf-8'
                                                    ))    

{'paragraphs': ['Here’s a rundown of the 10 best cat ear headphones that will make you feel all kawaii',
  'Headphones nowadays come in different shapes and sizes. Some may look more like your average headphones but some are much more unique — perhaps with flashing LEDs and cat ears.',
  'Kitty headphones or cat ear headphones have gained popularity through the years and you may want one for your own reasons. However, if you tried to buy one right off the bat, you may have learned the hard way that choosing one is not easy. Apart from their unique design, you also have to consider what’s inside to make sure that you’ll enjoy using them.',
  'Lucky for you, once you know what to actually look for, it will be easier. More than that, once you find out which cat ear headphones are the best, all you really have to do is choose. So, that’s what we’re going to do here!',
  'Cat ear headphones trace their roots to the Japanese concept of Nekomimi. This is a type of moe anthropomorphism in Japa

In [9]:
text= generate_context("best cat ear headphones", 1)
text

'Here’s a rundown of the 10 best cat ear headphones that will make you feel all kawaiiHeadphones nowadays come in different shapes and sizes. Some may look more like your average headphones but some are much more unique — perhaps with flashing LEDs and cat ears.Kitty headphones or cat ear headphones have gained popularity through the years and you may want one for your own reasons. However, if you tried to buy one right off the bat, you may have learned the hard way that choosing one is not easy. Apart from their unique design, you also have to consider what’s inside to make sure that you’ll enjoy using them.Lucky for you, once you know what to actually look for, it will be easier. More than that, once you find out which cat ear headphones are the best, all you really have to do is choose. So, that’s what we’re going to do here!Cat ear headphones trace their roots to the Japanese concept of Nekomimi. This is a type of moe anthropomorphism in Japanese anime or manga where human characte

In [None]:
kb = from_text_to_kb(text, verbose=True)
kb.print()

In [31]:
text

'Here’s a rundown of the 10 best cat ear headphones that will make you feel all kawaiiHeadphones nowadays come in different shapes and sizes. Some may look more like your average headphones but some are much more unique — perhaps with flashing LEDs and cat ears.Kitty headphones or cat ear headphones have gained popularity through the years and you may want one for your own reasons. However, if you tried to buy one right off the bat, you may have learned the hard way that choosing one is not easy. Apart from their unique design, you also have to consider what’s inside to make sure that you’ll enjoy using them.Lucky for you, once you know what to actually look for, it will be easier. More than that, once you find out which cat ear headphones are the best, all you really have to do is choose. So, that’s what we’re going to do here!Cat ear headphones trace their roots to the Japanese concept of Nekomimi. This is a type of moe anthropomorphism in Japanese anime or manga where human characte

In [32]:
with open("x.txt", "w") as fh:
    fh.write(text)

In [10]:
span_length= 128

In [11]:
model_inputs = tokenizer(text, max_length=512, padding=True, truncation=True,
                            return_tensors='pt')
model_inputs

{'input_ids': tensor([[    0, 11773,    17,    27,    29,    10, 29873,     9,     5,   158,
           275,  4758,  5567, 15684,    14,    40,   146,    47,   619,    70,
           449,  1584, 36729, 28873, 15797, 25708,   283,    11,   430, 16499,
             8, 10070,     4,   993,   189,   356,    55,   101,   110,   674,
         15684,    53,   103,    32,   203,    55,  2216,    93,  2532,    19,
         22643, 37274,     8,  4758, 12137,     4,   530, 18308, 15684,    50,
          4758,  5567, 15684,    33,  3491,  7347,   149,     5,   107,     8,
            47,   189,   236,    65,    13,   110,   308,  2188,     4,   635,
             6,   114,    47,  1381,     7,   907,    65,   235,   160,     5,
          6325,     6,    47,   189,    33,  2435,     5,   543,   169,    14,
          8348,    65,    16,    45,  1365,     4, 10501,    31,    49,  2216,
          1521,     6,    47,    67,    33,     7,  1701,    99,    17,    27,
            29,  1025,     7,   146,  

In [12]:
type(model_inputs)

transformers.tokenization_utils_base.BatchEncoding

In [13]:
inputs = tokenizer([text], return_tensors="pt")
inputs

Token indices sequence length is longer than the specified maximum sequence length for this model (8545 > 1024). Running this sequence through the model will result in indexing errors


{'input_ids': tensor([[    0, 11773,    17,  ...,  1437, 50140,     2]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1]])}

In [14]:
num_tokens = len(inputs["input_ids"][0])
num_tokens

8545

In [15]:
num_spans = math.ceil(num_tokens / span_length)
num_spans

67

In [16]:
overlap = math.ceil((num_spans * span_length - num_tokens) / 
                        max(num_spans - 1, 1))
spans_boundaries = []
start = 0
for i in range(num_spans):
    spans_boundaries.append([start + span_length * i,
                             start + span_length * (i + 1)])
    start -= overlap

In [33]:
spans_boundaries

[[0, 128],
 [127, 255],
 [254, 382],
 [381, 509],
 [508, 636],
 [635, 763],
 [762, 890],
 [889, 1017],
 [1016, 1144],
 [1143, 1271],
 [1270, 1398],
 [1397, 1525],
 [1524, 1652],
 [1651, 1779],
 [1778, 1906],
 [1905, 2033],
 [2032, 2160],
 [2159, 2287],
 [2286, 2414],
 [2413, 2541],
 [2540, 2668],
 [2667, 2795],
 [2794, 2922],
 [2921, 3049],
 [3048, 3176],
 [3175, 3303],
 [3302, 3430],
 [3429, 3557],
 [3556, 3684],
 [3683, 3811],
 [3810, 3938],
 [3937, 4065],
 [4064, 4192],
 [4191, 4319],
 [4318, 4446],
 [4445, 4573],
 [4572, 4700],
 [4699, 4827],
 [4826, 4954],
 [4953, 5081],
 [5080, 5208],
 [5207, 5335],
 [5334, 5462],
 [5461, 5589],
 [5588, 5716],
 [5715, 5843],
 [5842, 5970],
 [5969, 6097],
 [6096, 6224],
 [6223, 6351],
 [6350, 6478],
 [6477, 6605],
 [6604, 6732],
 [6731, 6859],
 [6858, 6986],
 [6985, 7113],
 [7112, 7240],
 [7239, 7367],
 [7366, 7494],
 [7493, 7621],
 [7620, 7748],
 [7747, 7875],
 [7874, 8002],
 [8001, 8129],
 [8128, 8256],
 [8255, 8383],
 [8382, 8510]]

In [18]:
tensor_ids = [inputs["input_ids"][0][boundary[0]:boundary[1]]
                  for boundary in spans_boundaries]
tensor_masks = [inputs["attention_mask"][0][boundary[0]:boundary[1]]
                    for boundary in spans_boundaries]


In [19]:
inputs_ = {
        "input_ids": torch.stack(tensor_ids),
        "attention_mask": torch.stack(tensor_masks)
    }
inputs_

{'input_ids': tensor([[    0, 11773,    17,  ...,    14,    47,    17],
         [   17,    27,   890,  ...,   554,    11, 35211],
         [35211,    30,    10,  ...,     8,   278,     5],
         ...,
         [31514,    23,     5,  ...,    42,     4,    38],
         [   38,    40,    28,  ...,   100,  1701,  2185],
         [ 2185,  5394,     7,  ...,     4,  1899, 38702]]),
 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],
         [1, 1, 1,  ..., 1, 1, 1],
         [1, 1, 1,  ..., 1, 1, 1],
         ...,
         [1, 1, 1,  ..., 1, 1, 1],
         [1, 1, 1,  ..., 1, 1, 1],
         [1, 1, 1,  ..., 1, 1, 1]])}

In [20]:
# generate relations
num_return_sequences = 3
gen_kwargs = {
    "max_length": 256,
    "length_penalty": 0,
    "num_beams": 3,
    "num_return_sequences": num_return_sequences
}

In [21]:
import transformers

In [22]:
inputs_= transformers.tokenization_utils_base.BatchEncoding(inputs_)
inputs_

{'input_ids': tensor([[    0, 11773,    17,  ...,    14,    47,    17],
        [   17,    27,   890,  ...,   554,    11, 35211],
        [35211,    30,    10,  ...,     8,   278,     5],
        ...,
        [31514,    23,     5,  ...,    42,     4,    38],
        [   38,    40,    28,  ...,   100,  1701,  2185],
        [ 2185,  5394,     7,  ...,     4,  1899, 38702]]), 'attention_mask': tensor([[1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        ...,
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1]])}

In [23]:
generated_tokens = model.generate(
        **inputs_.to(model.device),
        **gen_kwargs,
    )

In [None]:
generated_tokens = model.generate(
        **inputs_.to("cuda"),
        **gen_kwargs,
    )


In [24]:
decoded_preds = tokenizer.batch_decode(generated_tokens,
                                           skip_special_tokens=False)

# create kb
kb = KB()
i = 0
for sentence_pred in decoded_preds:
    current_span_index = i // num_return_sequences
    relations = extract_relations_from_model_output(sentence_pred)
    for relation in relations:
        relation["meta"] = {
            "spans": [spans_boundaries[current_span_index]]
        }
        kb.add_relation(relation)
    i += 1

In [26]:
kb.relations

[{'head': 'Kitty headphones',
  'type': 'subclass of',
  'tail': 'Headphones',
  'meta': {'spans': [[0, 128]]}},
 {'head': '10 best cat ear headphones',
  'type': 'is a list of',
  'tail': 'Headphones',
  'meta': {'spans': [[0, 128]]}},
 {'head': '10 best cat ear headphones',
  'type': 'subclass of',
  'tail': 'Headphones',
  'meta': {'spans': [[0, 128]]}},
 {'head': 'Nekomimi',
  'type': 'instance of',
  'tail': 'moe',
  'meta': {'spans': [[127, 255]]}},
 {'head': 'Nekomimi',
  'type': 'subclass of',
  'tail': 'moe',
  'meta': {'spans': [[127, 255]]}},
 {'head': 'Nekomimi',
  'type': 'instance of',
  'tail': 'anthropomorphism',
  'meta': {'spans': [[127, 255]]}},
 {'head': 'Catwoman',
  'type': 'publisher',
  'tail': 'DC Comics',
  'meta': {'spans': [[254, 382]]}},
 {'head': 'Catwoman',
  'type': 'creator',
  'tail': 'Kenji Miyazawa',
  'meta': {'spans': [[254, 382]]}},
 {'head': 'Kenji Miyazawa',
  'type': 'occupation',
  'tail': 'novelist',
  'meta': {'spans': [[254, 382]]}},
 {'hea

In [28]:
json.dump(kb.relations, open("relation.json", 'w'))

In [34]:
import json

In [35]:
d= json.load(open("relation.json", "r"))

[{'subject': 'cat ear',
  'verb': 'subclass of',
  'object': 'headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'cat ear headphones',
  'verb': 'subclass of',
  'object': 'headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': '10 best cat ear headphones',
  'verb': 'is a list of',
  'object': 'cat ear headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'headphones',
  'verb': 'subclass of',
  'object': 'Headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'headphones',
  'verb': 'different from',
  'object': 'Headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'Headphones',
  'verb': 'different from',
  'object': 'headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'Kitty headphones',
  'verb': 'subclass of',
  'object': 'headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'Kitty headphones',
  'verb': 'instance of',
  'object': 'headphones',
  'meta': {'spans': [[0, 128]]}},
 {'subject': 'Kitty headphone',
  'verb': 'subclass of',