In [None]:
import json
def process_jsonl_sentences(jsonl_file_path):
    with open(jsonl_file_path, 'r') as file:
        for line in file:
            json_obj = json.loads(line)
            text_to_infer = json_obj.get('text', '')
            print(text_to_infer)
process_jsonl_sentences('output.jsonl')

In [None]:
import re
from chained_classification.run_pipeline import run_pipeline as NERandREL
from class_recognition.class_recognition_pipeline import PipelineRunner as DOLCEAligner

def infer_years_for_dates(text):
    # Pattern to match dates, capturing the date part and the optional year
    date_pattern = re.compile(r'(\d{1,2}(?:st|nd|rd|th)?\s(?:January|February|March|April|May|June|July|August|September|October|November|December))(?:\s(\d{4}))?', re.I)
    
    # Find all dates in the text
    dates = [(match.group(0), match.start(), match.end(), match.group(2)) for match in date_pattern.finditer(text)]
    
    # If no dates found, return the original text
    if not dates:
        return text
    
    # Process dates to infer missing years
    for i, (_, start, end, year) in enumerate(dates):
        if year is None:
            # Look for the nearest date with a year, searching both directions
            prev_years = [prev_year for _, _, _, prev_year in dates[:i] if prev_year is not None]
            next_years = [next_year for _, _, _, next_year in dates[i+1:] if next_year is not None]
            
            # Determine the closest year from previous or next dates
            closest_year = prev_years[-1] if prev_years else (next_years[0] if next_years else None)
            
            # If a closest year is found, replace the date without year with the inferred year
            if closest_year:
                text = text[:start] + text[start:end] + f" {closest_year}" + text[end:]
    
    return text

def prepare_text_for_DOLCE_aligner(e, text):
    return f"{e.text} in the context of this sentence '{text}'"

def serialize_doc_with_relations(doc):
    # Convert the Doc to a basic JSON structure
    doc_json = doc.to_json()
    doc_json['relations'] = []
    doc_json['classes'] = {}  
    # Check for and add relation data if present
    if hasattr(doc._, 'rel'):
        relations = []
        for rel in doc._.rel:
            # Serialize all relations without filtering based on entity or class
            dep_entity = doc.ents[rel.dep]
            dest_entity = doc.ents[rel.dest]
            serialized_rel = {
                "dep_text": dep_entity.text,  # Dependent entity text
                "dep": rel.dep,  # Dependent entity index
                "rel": rel.relation,  # Relation type
                "dest_text": dest_entity.text,  # Destination entity text
                "dest": rel.dest  # Destination entity index
            }
            relations.append(serialized_rel)
        
        # Add the serialized relations to the doc_json
        doc_json['relations'] = relations

    return doc_json

def trim_context(entity, context, percentage):
    # Normalize spaces in entity and context
    entity = " ".join(entity.split())
    context = " ".join(context.split())
    
    # Use regex to find the entity in the context with case-insensitive search
    match = re.search(re.escape(entity), context, re.IGNORECASE)
    if not match:
        return context  # Or handle this case as you see fit
    
    # Extract the start index of the matched entity
    start_index = match.start()
    entity_words = entity.split()
    
    # Convert the context into words after finding the match to ensure alignment with entity position
    words = context.split()
    
    # Calculate the position of the entity in terms of word count, not characters
    word_count_before_entity = len(re.findall(r'\S+', context[:start_index]))
    
    # Calculate the number of words to include around the entity
    total_words = len(words)
    words_to_include = round(total_words * (percentage / 100))
    
    # Determine the slice of words to include around the entity
    half_words_to_include = words_to_include // 2
    slice_start = max(0, word_count_before_entity - half_words_to_include)
    slice_end = min(total_words, word_count_before_entity + len(entity_words) + half_words_to_include)
    
    # Adjust if the entity is towards the start or end of the sentence
    if slice_end - slice_start < words_to_include:
        if slice_start == 0:
            slice_end = min(slice_start + words_to_include, total_words)
        elif slice_end == total_words:
            slice_start = max(0, slice_end - words_to_include)
    
    # Reconstruct the trimmed context
    trimmed_context = ' '.join(words[slice_start:slice_end])
    
    return trimmed_context


entity2 = "lead vocalist"
context2 = "Anita Auglend is the lead vocalist of the gothic-doom metal band."
percentage2 = 60  # Adjust the percentage as needed
trimmed_context2 = trim_context(entity2, context2, percentage2)
print(trimmed_context2)

In [13]:
import json

def process_text_to_json_v2(text_to_infer):
    # Assume infer_years_for_dates, NERandREL, prepare_text_for_DOLCE_aligner, and serialize_doc_with_relations are defined elsewhere and operational
    text_to_infer = infer_years_for_dates(text_to_infer)
    doc = NERandREL(text=text_to_infer, config_path="./chained_classification/fewshot.cfg", examples_path="./chained_classification/examples.jsonl")

    # Use serialize_doc_with_relations to get initial structure including relations
    output_json = serialize_doc_with_relations(doc)

    # Update the text in the output_json
    output_json["text"] = text_to_infer

    # Process entities and match the new format
    for index, entity in enumerate(doc.ents):
        ent_dict = {
            "start_char": entity.start_char,
            "end_char": entity.end_char,
            "label": entity.label_,
            "text": entity.text
        }
        
        # For CLASS entities, use DOLCEAligner to determine subclassOf information
        if entity.label_ == "CLASS":
            print(entity.text)
            trimmed_context = trim_context(entity.text, text_to_infer, 50)
            print(trimmed_context)
            text = prepare_text_for_DOLCE_aligner(entity, trimmed_context)
            runner = DOLCEAligner(config_path="./class_recognition/fewshot.cfg", examples_path="./class_recognition/examples.jsonl")
            dolce_doc = runner.run(text)
            filtered_categories = {label: score for label, score in dolce_doc.cats.items() if score > 0.0}
            labels_with_positive_scores = list(filtered_categories.keys())

            # Add the subclassOf information to the ent_dict
            ent_dict["subClassOf"] = labels_with_positive_scores if labels_with_positive_scores else ["Unknown"]

            # Store class information separately with subclassOf details
            formatted_text = entity.text.replace(" ", "_")
            key = f"{formatted_text}_{index}"
            output_json['classes'][key] = {"labels": labels_with_positive_scores, "class": entity.text}

        # Append entity information to the ents list in output_json
        output_json["ents"].append(ent_dict)

    # Note: Relations are already included in output_json from the serialize_doc_with_relations call

    return output_json



# text_to_infer = "Fut\u016bh al-Buld\u0101n is an Arabic book by Persian historian Ahmad Ibn Yahya al-Baladhuri. The work by which he is best known is the Kitab Futuh al-Buldan (\"Book of the Conquests of the Lands\"), edited by M. J. de Goeje as Liber expugnationis regionum (Leiden, 1870; Cairo, 1901)."
# processed_json = process_text_to_json_v2(text_to_infer)
# with open("results/doc_data_1.json", "w", encoding="utf-8") as f:
#     json.dump(processed_json, f, ensure_ascii=False, indent=2)

In [None]:
import json

def find_substring_position(main_string, substring):
    start_pos = main_string.find(substring)
    if start_pos != -1:
        end_pos = start_pos + len(substring)
        return start_pos, end_pos
    else:
        return None, None  # Substring not found

def read_and_check_json_file(file_path):
    # Open and read the JSON file
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    for e in data['ents']:
        if e['text'] in data['text']:
            start_real, end_real = find_substring_position(data['text'], e['text'])
            start_j = e['start_char']
            end_j = e['end_char']
            print(start_real, start_j)
            print(end_real, end_j)


# Specify the path to your JSON file
json_file_path = 'examples_jsonl/adalgis.jsonl'

# Run the script with the specified JSON file
read_and_check_json_file(json_file_path)

In [None]:
text = "Aaron Copland was an American composer, composition teacher, writer, and later in his career a conductor of his own and other American music."
find_substring_position(text, "American music")

In [20]:
import json

def parse_json_and_return_data(input_file):
    # Open and read the JSON file
    with open(input_file, 'r', encoding='utf-8') as file:
        data = json.load(file)  # Load the entire JSON file
        
        # Assume data is a list of objects, each with a "sentence" key
        for index, item in enumerate(data):
            # Extract the sentence text
            sentence_text = item["sentence"]
            
            # Process the text
            processed_json = process_text_to_json_v2(sentence_text)
            
            # Construct the output filename
            output_filename = f"results/doc_data_{index}.json"
            
            # Save the processed JSON to a file
            with open(output_filename, "w", encoding='utf-8') as out_file:
                json.dump(processed_json, out_file, ensure_ascii=False, indent=2)

            print(f"Processed and saved: {output_filename}")

# Example usage
# Adjust the input file path to your actual JSON file path
input_file_path = 'eva_prompt_task_2.json'
parse_json_and_return_data(input_file_path)




Italian book
'La casta. Così i politici italiani sono diventati intoccabili' is an Italian book, written by Sergio Rizzo and Gian Antonio Stella, two journalists from the
Text: Italian book in the context of this sentence ''La casta. Così i politici
italiani sono diventati intoccabili' is an Italian book, written by Sergio Rizzo
and Gian Antonio Stella, two journalists from the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376EE80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
journalists
an Italian book, written by Sergio Rizzo and Gian Antonio Stella, two journalists from the Italian national newspaper Corriere della Sera, detailing the amount of
Text: journalists in the context of this sentence 'an Italian 



geometry
In geometry, a deltoid, also known as a tricuspoid
Text: geometry in the context of this sentence 'In geometry, a deltoid, also
known as a tricuspoid'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376CAE0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 1.0})
tricuspoid
also known as a tricuspoid or Steiner curve, is
Text: tricuspoid in the context of this sentence 'also known as a tricuspoid or
Steiner curve, is'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376DEE0>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.



pre-Julian Roman calendar
Year 412 BC was a year of the pre-Julian Roman calendar. At the time, it was known as the Year of the Consulship of Ambustus and Pacilus (or, less frequently, year 342
Text: pre-Julian Roman calendar in the context of this sentence 'Year 412 BC was
a year of the pre-Julian Roman calendar. At the time, it was known as the Year
of the Consulship of Ambustus and Pacilus (or, less frequently, year 342'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E2178F40>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 1.0, 'TOPIC': 0.0})
Year of the Consulship of Ambustus and Pacilus
was a year of the pre-Julian Roman calendar. At the time, it was known as the Year of the Consulship of Ambustus and Pacilus (or, less frequentl



Arabic book
'Futūh al-Buldān' is an Arabic book by Persian historian Ahmad Ibn Yahya al-Baladhuri. The work by which he
Text: Arabic book in the context of this sentence ''Futūh al-Buldān' is an
Arabic book by Persian historian Ahmad Ibn Yahya al-Baladhuri. The work by which
he'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E213BD80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Persian historian
'Futūh al-Buldān' is an Arabic book by Persian historian Ahmad Ibn Yahya al-Baladhuri. The work by which he
Text: Persian historian in the context of this sentence ''Futūh al-Buldān' is an
Arabic book by Persian historian Ahmad Ibn Yahya al-Baladhuri. The work by which
he'
Categories: defaultdict(<function reduce_shards



percussionist
Glen Velez (born 1949) is a percussionist, vocalist, and composer, specializing in frame drums from around the world. He is largely responsible
Text: percussionist in the context of this sentence 'Glen Velez (born 1949) is a
percussionist, vocalist, and composer, specializing in frame drums from around
the world. He is largely responsible'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376D800>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
vocalist
Glen Velez (born 1949) is a percussionist, vocalist, and composer, specializing in frame drums from around the world. He is largely responsible
Text: vocalist in the context of this sentence 'Glen Velez (born 1949) is a
percussionist, vocalist, and compo



handheld game console
The 'Pokémon Mini' is a handheld game console designed and manufactured by Nintendo and themed around the Pokémon media franchise. It is
Text: handheld game console in the context of this sentence 'The 'Pokémon Mini'
is a handheld game console designed and manufactured by Nintendo and themed
around the Pokémon media franchise. It is'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21798A0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Pokémon media franchise
game console designed and manufactured by Nintendo and themed around the Pokémon media franchise. It is the smallest game system with interchangeable cartridges ever produced
Text: Pokémon media franchise in the context of this sentence 



German
Prussia, and was a German mathematician.
Text: German in the context of this sentence 'Prussia, and was a German
mathematician.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5DFAFA840>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
mathematician
Prussia, and was a German mathematician.
Text: mathematician in the context of this sentence 'Prussia, and was a German
mathematician.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3F2C5E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJ



leaf chicory
Radicchio is a leaf chicory (Cichorium intybus, Asteraceae), sometimes known as Italian chicory, and is a perennial. It is grown as a
Text: leaf chicory in the context of this sentence 'Radicchio is a leaf chicory
(Cichorium intybus, Asteraceae), sometimes known as Italian chicory, and is a
perennial. It is grown as a'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E32AB7E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Italian chicory
Radicchio is a leaf chicory (Cichorium intybus, Asteraceae), sometimes known as Italian chicory, and is a perennial. It is grown as a leaf vegetable
Text: Italian chicory in the context of this sentence 'Radicchio is a leaf
chicory (Cichorium intybus, Asteraceae), some



Italian literary prize
Rapallo Carige Prize is an Italian literary prize, established in 1985 by the
Text: Italian literary prize in the context of this sentence 'Rapallo Carige
Prize is an Italian literary prize, established in 1985 by the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E217A840>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_8.json




marine animals
Holothuria is a genus of marine animals of the family Holothuriidae. Members of
Text: marine animals in the context of this sentence 'Holothuria is a genus of
marine animals of the family Holothuriidae. Members of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5DFAFB740>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_9.json




opera house
Massimo Vittorio Emanuele is an opera house and opera company located on
Text: opera house in the context of this sentence 'Massimo Vittorio Emanuele is
an opera house and opera company located on'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E31160C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
opera company
is an opera house and opera company located on the Piazza Verdi
Text: opera company in the context of this sentence 'is an opera house and opera
company located on the Piazza Verdi'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E31159E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'O



ancient site
Marlik is an ancient site near Roudbar in Gilan, north of Iran. The site of
Text: ancient site in the context of this sentence 'Marlik is an ancient site
near Roudbar in Gilan, north of Iran. The site of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376DB20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
royal cemetery
north of Iran. The site of a royal cemetery, and artifacts found at this site date
Text: royal cemetery in the context of this sentence 'north of Iran. The site of
a royal cemetery, and artifacts found at this site date'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3114860>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY



monotheism
n monotheism and henotheism, God is conceived as the Supreme Being. The concept of God as described by theologians commonly includes the
Text: monotheism in the context of this sentence 'n monotheism and henotheism,
God is conceived as the Supreme Being. The concept of God as described by
theologians commonly includes the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3114AE0>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
henotheism
n monotheism and henotheism, God is conceived as the Supreme Being. The concept of God as described by theologians commonly includes the
Text: henotheism in the context of this sentence 'n monotheism and henotheism,
God is conceived as the Supreme Being. The concept of Go



painting
The Choice of Hercules is a painting by the Italian Baroque painter Annibale Carracci. Dating from
Text: painting in the context of this sentence 'The Choice of Hercules is a
painting by the Italian Baroque painter Annibale Carracci. Dating from'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E32AADE0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Italian Baroque painter
Choice of Hercules is a painting by the Italian Baroque painter Annibale Carracci. Dating from 1596, it is housed
Text: Italian Baroque painter in the context of this sentence 'Choice of
Hercules is a painting by the Italian Baroque painter Annibale Carracci. Dating
from 1596, it is housed'
Categories: defaultdict(<function reduce_shards



Korean
Hanja is the Korean name for Chinese characters (hanzi). More specifically, it refers to
Text: Korean in the context of this sentence 'Hanja is the Korean name for
Chinese characters (hanzi). More specifically, it refers to'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E32A8900>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Chinese characters
Hanja is the Korean name for Chinese characters (hanzi). More specifically, it refers to those
Text: Chinese characters in the context of this sentence 'Hanja is the Korean
name for Chinese characters (hanzi). More specifically, it refers to those'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376F920>, {'ABSTRACT': 0.0, 'ACT



natural number
is the natural number following 97
Text: natural number in the context of this sentence 'is the natural number
following 97'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FD120>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_15.json




scientific study
Environmental chemistry is the scientific study of the chemical and biochemical phenomena that occur in natural places.
Text: scientific study in the context of this sentence 'Environmental chemistry
is the scientific study of the chemical and biochemical phenomena that occur in
natural places.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5DF801300>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
chemical and biochemical phenomena
Environmental chemistry is the scientific study of the chemical and biochemical phenomena that occur in natural places. It should not
Text: chemical and biochemical phenomena in the context of this sentence
'Environmental chemistry is the scientific study of the chemica



anime and manga series
Yasutora Sado is a fictional character in the anime and manga series Bleach created by Tite Kubo. He is an extremely tall teenager who studies in the same class as Ichigo Kurosaki.
Text: anime and manga series in the context of this sentence 'Yasutora Sado is a
fictional character in the anime and manga series Bleach created by Tite Kubo.
He is an extremely tall teenager who studies in the same class as Ichigo
Kurosaki.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8AD40>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 1.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
teenager
in the anime and manga series Bleach created by Tite Kubo. He is an extremely tall teenager who studies in the same class as Ichigo Kurosaki. Because Ichigo first



philosophy of mathematics
In the philosophy of mathematics, intuitionism, or neointuitionism (opposed to preintuitionism), is an approach where mathematics is considered to be purely the result of the constructive mental activity of humans rather than the discovery of fundamental principles claimed to exist in an objective reality. That is,
Text: philosophy of mathematics in the context of this sentence 'In the
philosophy of mathematics, intuitionism, or neointuitionism (opposed to
preintuitionism), is an approach where mathematics is considered to be purely
the result of the constructive mental activity of humans rather than the
discovery of fundamental principles claimed to exist in an objective reality.
That is,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E3A020>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSIC



pharaoh
was a pharaoh of the
Text: pharaoh in the context of this sentence 'was a pharaoh of the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E89F80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_19.json




town
Skalbmierz is a town in south eastern Poland, in Świętokrzyskie Voivodeship, in Kazimierza County. It has 1,326 inhabitants (2004). Skalbmierz has a long and
Text: town in the context of this sentence 'Skalbmierz is a town in south
eastern Poland, in Świętokrzyskie Voivodeship, in Kazimierza County. It has
1,326 inhabitants (2004). Skalbmierz has a long and'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E3B2E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
history
County. It has 1,326 inhabitants (2004). Skalbmierz has a long and rich history, the town belongs to the province of Lesser Poland. Skalbmierz received town
Text: history in the context of this sentence 'County. It has 1,326 inhabitants
(2004). 



Air Canada aircraft
The Gimli Glider is the nickname of an Air Canada aircraft that was involved in an unusual aviation incident. On July 23, 1983, Air Canada Flight 143, a Boeing 767–233 jet, ran out of fuel at an altitude of 41,000 feet
Text: Air Canada aircraft in the context of this sentence 'The Gimli Glider is
the nickname of an Air Canada aircraft that was involved in an unusual aviation
incident. On July 23, 1983, Air Canada Flight 143, a Boeing 767–233 jet, ran out
of fuel at an altitude of 41,000 feet'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2E160>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
aviation incident
The Gimli Glider is the nickname of an Air Canada aircraft that was involved in an 



motorsport race track
The TT Circuit Assen is a motorsport race track built in 1955 and located in Assen, Netherlands. Host of the Dutch TT race, it is popularly referred to as "The
Text: motorsport race track in the context of this sentence 'The TT Circuit
Assen is a motorsport race track built in 1955 and located in Assen,
Netherlands. Host of the Dutch TT race, it is popularly referred to as "The'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2FE20>, {'ABSTRACT': 0.0, 'ACTIVITY': 1.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Dutch TT race
a motorsport race track built in 1955 and located in Assen, Netherlands. Host of the Dutch TT race, it is popularly referred to as "The Cathedral" of motorcycling by the fans of the
Text: Dutch TT race



comedy-drama film
Nothing in Common is a 1986 comedy-drama film, directed by Garry Marshall. It stars Tom Hanks and Jackie Gleason in what would
Text: comedy-drama film in the context of this sentence 'Nothing in Common is a
1986 comedy-drama film, directed by Garry Marshall. It stars Tom Hanks and
Jackie Gleason in what would'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8A700>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Gleason's final film role
Tom Hanks and Jackie Gleason in what would prove to be Gleason's final film role - he was suffering from colon cancer, liver cancer, and thrombosed
Text: Gleason's final film role in the context of this sentence 'Tom Hanks and
Jackie Gleason in what would prove t



Processed and saved: results/doc_data_24.json




municipality
Rourell is a municipality in the comarca
Text: municipality in the context of this sentence 'Rourell is a municipality in
the comarca'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8B1A0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
comarca
municipality in the comarca of Alt Camp,
Text: comarca in the context of this sentence 'municipality in the comarca of
Alt Camp,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401A7A0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJ



studio album
second and final studio album by Tone Lōc.
Text: studio album in the context of this sentence 'second and final studio
album by Tone Lōc.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2DA80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_26.json




water
Hard water is water that has high mineral content (in contrast with "soft water"). Hard water
Text: water in the context of this sentence 'Hard water is water that has high
mineral content (in contrast with "soft water"). Hard water'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E217A160>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
mineral content
Hard water is water that has high mineral content (in contrast with "soft water"). Hard water is
Text: mineral content in the context of this sentence 'Hard water is water that
has high mineral content (in contrast with "soft water"). Hard water is'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E217AAC0>, {'ABSTRACT': 0.0



concurrent programming language
occam is a concurrent programming language that builds on the communicating sequential processes (CSP) process algebra,
Text: concurrent programming language in the context of this sentence 'occam is
a concurrent programming language that builds on the communicating sequential
processes (CSP) process algebra,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E2179BC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
communicating sequential processes (CSP) process algebra
a concurrent programming language that builds on the communicating sequential processes (CSP) process algebra, and shares many of its features. It is
Text: communicating sequential processes (CSP) process algebra in th



oldest extant chronicle
Kojiki is the oldest extant chronicle in Japan, dating from the early 8th century
Text: oldest extant chronicle in the context of this sentence 'Kojiki is the
oldest extant chronicle in Japan, dating from the early 8th century'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FED40>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 1.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_29.json




educational approach
Cooperative learning is an educational approach which aims to organize classroom activities into academic and social learning experiences. There is
Text: educational approach in the context of this sentence 'Cooperative learning
is an educational approach which aims to organize classroom activities into
academic and social learning experiences. There is'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E217AB60>, {'ABSTRACT': 0.0, 'ACTIVITY': 1.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
classroom activities
Cooperative learning is an educational approach which aims to organize classroom activities into academic and social learning experiences. There is much more
Text: classroom activities in the context of this sentence 'Co



color photography process
The Autochrome Lumière is an early color photography process. Patented in 1903 by the Lumière brothers in France and first marketed
Text: color photography process in the context of this sentence 'The Autochrome
Lumière is an early color photography process. Patented in 1903 by the Lumière
brothers in France and first marketed'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E186B420>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Patented
The Autochrome Lumière is an early color photography process. Patented in 1903 by the Lumière brothers in France and first marketed
Text: Patented in the context of this sentence 'The Autochrome Lumière is an
early color photography process. Patented in 



Oceanica
Oceania, also known as Oceanica, is a region centered on the islands of the tropical Pacific Ocean. Opinions of what constitutes Oceania range from its
Text: Oceanica in the context of this sentence 'Oceania, also known as Oceanica,
is a region centered on the islands of the tropical Pacific Ocean. Opinions of
what constitutes Oceania range from its'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E217B2E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
region
Oceania, also known as Oceanica, is a region centered on the islands of the tropical Pacific Ocean. Opinions of what constitutes Oceania range from its
Text: region in the context of this sentence 'Oceania, also known as Oceanica,
is a region centere



province
Gelderland is a province of the Netherlands, located
Text: province in the context of this sentence 'Gelderland is a province of the
Netherlands, located'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E2179D00>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
central eastern part
Netherlands, located in the central eastern part of the country.
Text: central eastern part in the context of this sentence 'Netherlands, located
in the central eastern part of the country.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FD260>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 



professor
A professor is a scholarly teacher in post-secondary education; the precise meaning of the term varies by country. Literally, professor derives
Text: professor in the context of this sentence 'A professor is a scholarly
teacher in post-secondary education; the precise meaning of the term varies by
country. Literally, professor derives'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376F600>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
teacher
A professor is a scholarly teacher in post-secondary education; the precise meaning of the term varies by country. Literally, professor derives
Text: teacher in the context of this sentence 'A professor is a scholarly
teacher in post-secondary education; the prec



cartoon character
Mickey Mouse is an anthropomorphic cartoon character and the official mascot of The Walt Disney Company.
Text: cartoon character in the context of this sentence 'Mickey Mouse is an
anthropomorphic cartoon character and the official mascot of The Walt Disney
Company.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2D620>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 1.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
official mascot
Mouse is an anthropomorphic cartoon character and the official mascot of The Walt Disney Company. He was created
Text: official mascot in the context of this sentence 'Mouse is an
anthropomorphic cartoon character and the official mascot of The Walt Disney
Company. He was created'
Categories: defaultdict(<function r



Augustinian church
The Basilica of Santa Maria del Popolo is an Augustinian church in Rome, Italy. It stands on the north side of Piazza del Popolo, one
Text: Augustinian church in the context of this sentence 'The Basilica of Santa
Maria del Popolo is an Augustinian church in Rome, Italy. It stands on the north
side of Piazza del Popolo, one'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2CB80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_36.json




town
Munkebo is a town in central Denmark, located in Kerteminde municipality, Region of Southern Denmark on the
Text: town in the context of this sentence 'Munkebo is a town in central
Denmark, located in Kerteminde municipality, Region of Southern Denmark on the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019DA0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
municipal council
island of Funen. It was also the site of the municipal council of the abolished Munkebo municipality.
Text: municipal council in the context of this sentence 'island of Funen. It was
also the site of the municipal council of the abolished Munkebo municipality.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> 



perennial plant
A perennial plant or simply perennial (from Latin per, meaning "through", and annus, meaning "year") is a plant that lives for more than two years. The term is often used to differentiate
Text: perennial plant in the context of this sentence 'A perennial plant or
simply perennial (from Latin per, meaning "through", and annus, meaning "year")
is a plant that lives for more than two years. The term is often used to
differentiate'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8AC00>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
perennial
A perennial plant or simply perennial (from Latin per, meaning "through", and annus, meaning "year") is a plant that lives for more than two years. The term is o



mathematical field
In the mathematical field of numerical analysis, interpolation is a method of constructing new
Text: mathematical field in the context of this sentence 'In the mathematical
field of numerical analysis, interpolation is a method of constructing new'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E88540>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 1.0})
numerical analysis
In the mathematical field of numerical analysis, interpolation is a method of constructing new
Text: numerical analysis in the context of this sentence 'In the mathematical
field of numerical analysis, interpolation is a method of constructing new'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x00000



celtiberian sculptures
of Guisando are a set of celtiberian sculptures located on the hill of Guisando
Text: celtiberian sculptures in the context of this sentence 'of Guisando are a
set of celtiberian sculptures located on the hill of Guisando'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2EAC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_40.json




Wali
Harun of Barcelona was the last Wali of Barcelona, ruling from 800 to 801. Harun succeeded as Wali when his predecessor, Sa'dun al Ruayni was captured
Text: Wali in the context of this sentence 'Harun of Barcelona was the last Wali
of Barcelona, ruling from 800 to 801. Harun succeeded as Wali when his
predecessor, Sa'dun al Ruayni was captured'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4018900>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Muslim population
by besieging Frank Forces while trying reach Córdoba to raise help. Harun was chosen as Wali by both the Goths and Muslim population in the City.
Text: Muslim population in the context of this sentence 'by besieging Frank
Forces while trying reach C



Bhimasena
Bhima, also called Bhimasena and Vrikodara (Vṛkōdara, lit. 'one with
Text: Bhimasena in the context of this sentence 'Bhima, also called Bhimasena
and Vrikodara (Vṛkōdara, lit. 'one with'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8B4C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 1.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Vrikodara
Bhima, also called Bhimasena and Vrikodara (Vṛkōdara, lit. 'one with a
Text: Vrikodara in the context of this sentence 'Bhima, also called Bhimasena
and Vrikodara (Vṛkōdara, lit. 'one with a'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401B1A0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANIS



intrusive igneous rock
Diorite is an intrusive igneous rock composed principally of plagioclase feldspar (typically andesine), biotite, hornblende, and/or pyroxene. It is distinguished from gabbro
Text: intrusive igneous rock in the context of this sentence 'Diorite is an
intrusive igneous rock composed principally of plagioclase feldspar (typically
andesine), biotite, hornblende, and/or pyroxene. It is distinguished from
gabbro'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401B420>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
plagioclase feldspar
Diorite is an intrusive igneous rock composed principally of plagioclase feldspar (typically andesine), biotite, hornblende, and/or pyroxene. It is distinguished fr



globular cluster
Messier 92 (also known as M92 or NGC 6341) is a globular cluster of stars in the northern constellation of Hercules. It was discovered by Johann Elert Bode in 1777, then published in the Jahrbuch
Text: globular cluster in the context of this sentence 'Messier 92 (also known
as M92 or NGC 6341) is a globular cluster of stars in the northern constellation
of Hercules. It was discovered by Johann Elert Bode in 1777, then published in
the Jahrbuch'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E40184A0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
stars
Messier 92 (also known as M92 or NGC 6341) is a globular cluster of stars in the northern constellation of Hercules. It was discovered by Johann El



fiction genre
Science fiction is a fiction genre dealing with imaginative content such as futuristic settings, futuristic science
Text: fiction genre in the context of this sentence 'Science fiction is a
fiction genre dealing with imaginative content such as futuristic settings,
futuristic science'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E891C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 1.0})
futuristic settings
fiction genre dealing with imaginative content such as futuristic settings, futuristic science and technology, space travel, time travel,
Text: futuristic settings in the context of this sentence 'fiction genre dealing
with imaginative content such as futuristic settings, futuristic science and
tec



professor of physics
Richard Threlkeld Cox was a professor of physics at Johns Hopkins University, known
Text: professor of physics in the context of this sentence 'Richard Threlkeld
Cox was a professor of physics at Johns Hopkins University, known'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401B9C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
foundations of probability
University, known for Cox's theorem relating to the foundations of probability.
Text: foundations of probability in the context of this sentence 'University,
known for Cox's theorem relating to the foundations of probability.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019260>, {'ABSTRACT': 0.0, 



oasis town
Oum Dreiga is an oasis town in the Río de
Text: oasis town in the context of this sentence 'Oum Dreiga is an oasis town in
the Río de'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8BEC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_47.json




province
In Ancient Rome, a province (Latin, provincia, pl. provinciae) was a territorial unit of the empire's territorial possessions
Text: province in the context of this sentence 'In Ancient Rome, a province
(Latin, provincia, pl. provinciae) was a territorial unit of the empire's
territorial possessions'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2C040>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
territorial unit
Rome, a province (Latin, provincia, pl. provinciae) was a territorial unit of the empire's territorial possessions outside of Italy. The
Text: territorial unit in the context of this sentence 'Rome, a province (Latin,
provincia, pl. provinciae) was a territorial unit of the empire's territor



Peace of Lodi
The Treaty of Lodi, also known as the Peace of Lodi was a peace agreement between Milan, Naples, and Florence signed on April 9, 1454 at Lodi in Lombardy, on the banks of the Adda. It put an end to the long struggles between
Text: Peace of Lodi in the context of this sentence 'The Treaty of Lodi, also
known as the Peace of Lodi was a peace agreement between Milan, Naples, and
Florence signed on April 9, 1454 at Lodi in Lombardy, on the banks of the Adda.
It put an end to the long struggles between'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2C040>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 1.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
peace agreement
The Treaty of Lodi, also known as the Peace of Lodi was a peace agreement between Mil



community
utopia is a community possessing highly desirable
Text: community in the context of this sentence 'utopia is a community
possessing highly desirable'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401A020>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 1.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_50.json




bradyarrhythmia
Bradycardia, also known as bradyarrhythmia, is a slow heart rate, namely, a resting heart rate of under 60 beats per minute (BPM) in adults. It is a type
Text: bradyarrhythmia in the context of this sentence 'Bradycardia, also known
as bradyarrhythmia, is a slow heart rate, namely, a resting heart rate of under
60 beats per minute (BPM) in adults. It is a type'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2C040>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
heart rate
Bradycardia, also known as bradyarrhythmia, is a slow heart rate, namely, a resting heart rate of under 60 beats per minute (BPM) in adults. It is a type
Text: heart rate in the context of this sentence 'Bradycardia, also known 



breed
The name of the breed means "an estate guard dog," which is
Text: breed in the context of this sentence 'The name of the breed means "an
estate guard dog," which is'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FEDE0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
estate guard dog
name of the breed means "an estate guard dog," which is the original use for
Text: estate guard dog in the context of this sentence 'name of the breed means
"an estate guard dog," which is the original use for'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2F7E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANIS



Swedish film
Kopps is a 2003 Swedish film directed by Josef Fares. The name itself
Text: Swedish film in the context of this sentence 'Kopps is a 2003 Swedish film
directed by Josef Fares. The name itself'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E89F80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 1.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
English word "Cops"
is a pun on pronouncing the English word "Cops" with a Swedish accent.
Text: English word "Cops" in the context of this sentence 'is a pun on
pronouncing the English word "Cops" with a Swedish accent.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E896C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, '



musical family
in Marseilles, to a musical family. He was a French
Text: musical family in the context of this sentence 'in Marseilles, to a
musical family. He was a French'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5BC506840>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
French
musical family. He was a French virtuoso violinist.
Text: French in the context of this sentence 'musical family. He was a French
virtuoso violinist.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2EFC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0



system of government
Democracy is "a system of government in which all the people of a state or polity ... are
Text: system of government in the context of this sentence 'Democracy is "a
system of government in which all the people of a state or polity ... are'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E89760>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
polity
in which all the people of a state or polity ... are involved in making decisions about its affairs,
Text: polity in the context of this sentence 'in which all the people of a state
or polity ... are involved in making decisions about its affairs,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2DC60>, {'AB



meteorology
In meteorology, virga is an observable precipitation shaft that falls from a cloud but evaporates or sublimes before reaching the ground. At high altitudes the precipitation
Text: meteorology in the context of this sentence 'In meteorology, virga is an
observable precipitation shaft that falls from a cloud but evaporates or
sublimes before reaching the ground. At high altitudes the precipitation'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8B9C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
precipitation shaft
In meteorology, virga is an observable precipitation shaft that falls from a cloud but evaporates or sublimes before reaching the ground. At high altitudes the precipitation
Text: precipi



Soviet space dog
Laika was a Soviet space dog who became one of the first animals in space, and the first animal to orbit the Earth. Laika, a stray dog
Text: Soviet space dog in the context of this sentence 'Laika was a Soviet space
dog who became one of the first animals in space, and the first animal to orbit
the Earth. Laika, a stray dog'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2DA80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
space
Laika was a Soviet space dog who became one of the first animals in space, and the first animal to orbit the Earth. Laika, a stray dog
Text: space in the context of this sentence 'Laika was a Soviet space dog who
became one of the first animals in space, and the first 



cartoon fictional character
Popeye the Sailor Man is a cartoon fictional character, created by Elzie Crisler Segar, who has appeared in comic strips and theatrical and television animated
Text: cartoon fictional character in the context of this sentence 'Popeye the
Sailor Man is a cartoon fictional character, created by Elzie Crisler Segar, who
has appeared in comic strips and theatrical and television animated'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5B929C400>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 1.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
comic strips
cartoon fictional character, created by Elzie Crisler Segar, who has appeared in comic strips and theatrical and television animated cartoons. He first appeared in the daily
Text: comic stri



GAA stadium
Pearse Stadium is the GAA stadium, in County Galway, Ireland. The stadium opened on June 16, 1957, as 16,000 people
Text: GAA stadium in the context of this sentence 'Pearse Stadium is the GAA
stadium, in County Galway, Ireland. The stadium opened on June 16, 1957, as
16,000 people'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8AAC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
hurling
as 16,000 people came to watch Galway beat Tipperary in hurling, and Kerry in football, and to watch Bishop Michael Browne
Text: hurling in the context of this sentence 'as 16,000 people came to watch
Galway beat Tipperary in hurling, and Kerry in football, and to watch Bishop
Michael Browne'
Categories: defaultdi



village
Porcuna is a village and municipality in the province of Jaén in Andalusia, Spain, 42 km from Jaén and 50 km from Córdoba. The primary occupation
Text: village in the context of this sentence 'Porcuna is a village and
municipality in the province of Jaén in Andalusia, Spain, 42 km from Jaén and 50
km from Córdoba. The primary occupation'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2EAC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
municipality
Porcuna is a village and municipality in the province of Jaén in Andalusia, Spain, 42 km from Jaén and 50 km from Córdoba. The primary occupation
Text: municipality in the context of this sentence 'Porcuna is a village and
municipality in the province of Jaé



international crises
The Berlin Blockade was one of the first major international crises of the Cold War. During the multinational occupation of post-World War
Text: international crises in the context of this sentence 'The Berlin Blockade
was one of the first major international crises of the Cold War. During the
multinational occupation of post-World War'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FF1A0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 1.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 1.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
multinational occupation
the first major international crises of the Cold War. During the multinational occupation of post-World War II Germany, the Soviet Union blocked the Western
Text: multinational occupation in the context of this sentence 'the first maj



album
in 1998, is an album by the New York
Text: album in the context of this sentence 'in 1998, is an album by the New
York'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E89F80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
band
by the New York City band Soul Coughing.
Text: band in the context of this sentence 'by the New York City band Soul
Coughing.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2C180>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 



Italian painter
Baffini (1326 – 1379) was an Italian painter of the mid-14th century.
Text: Italian painter in the context of this sentence 'Baffini (1326 – 1379) was
an Italian painter of the mid-14th century.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E89620>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_63.json




spinning wheel
A gyroscope is a spinning wheel or disc in which the
Text: spinning wheel in the context of this sentence 'A gyroscope is a spinning
wheel or disc in which the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8A980>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
disc
is a spinning wheel or disc in which the axis of
Text: disc in the context of this sentence 'is a spinning wheel or disc in which
the axis of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376F100>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROL



artist
Pakito, is a French electronic dance music artist.
Text: artist in the context of this sentence 'Pakito, is a French electronic
dance music artist.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401B880>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_65.json




mathematics
In mathematics, a lattice is a partially ordered set in which every two elements have a unique supremum (also called a least upper bound or join) and a unique infimum (also called a greatest
Text: mathematics in the context of this sentence 'In mathematics, a lattice is
a partially ordered set in which every two elements have a unique supremum (also
called a least upper bound or join) and a unique infimum (also called a
greatest'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E217B600>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 1.0})
lattice
In mathematics, a lattice is a partially ordered set in which every two elements have a unique supremum (also called a least upper bound or join) and a unique infimu



plateau region
The Adamawa Plateau is a plateau region in west-central Africa stretching from south-eastern
Text: plateau region in the context of this sentence 'The Adamawa Plateau is a
plateau region in west-central Africa stretching from south-eastern'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E2178B80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
North Provinces
Nigeria through north-central Cameroon (Adamawa and North Provinces) to the Central African Republic.
Text: North Provinces in the context of this sentence 'Nigeria through
north-central Cameroon (Adamawa and North Provinces) to the Central African
Republic.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E



sea stack
Strombolicchio is a sea stack of volcanic origin 2 km to the
Text: sea stack in the context of this sentence 'Strombolicchio is a sea stack
of volcanic origin 2 km to the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E3B560>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
volcanic origin
Strombolicchio is a sea stack of volcanic origin 2 km to the northeast of
Text: volcanic origin in the context of this sentence 'Strombolicchio is a sea
stack of volcanic origin 2 km to the northeast of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E2278F40>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGAN



black-gloved wallaby
The western brush (Macropus irma), also known as the black-gloved wallaby, is a wallaby found in the southwest coastal region of Western Australia. The wallaby's main threat is predation by the introduced red fox
Text: black-gloved wallaby in the context of this sentence 'The western brush
(Macropus irma), also known as the black-gloved wallaby, is a wallaby found in
the southwest coastal region of Western Australia. The wallaby's main threat is
predation by the introduced red fox'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FDB20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
wallaby
The western brush (Macropus irma), also known as the black-gloved wallaby, is a wallaby found in the so



motion
Rolling is a type of motion that combines rotation (commonly, of an axially symmetric object) and translation of that object with respect to a
Text: motion in the context of this sentence 'Rolling is a type of motion that
combines rotation (commonly, of an axially symmetric object) and translation of
that object with respect to a'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FD120>, {'ABSTRACT': 0.0, 'ACTIVITY': 1.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
rotation
Rolling is a type of motion that combines rotation (commonly, of an axially symmetric object) and translation of that object with respect to a
Text: rotation in the context of this sentence 'Rolling is a type of motion that
combines rotation (commonly, of an axially sym



Patristic scholar
Cotelier or Cotelerius was a Patristic scholar and Catholic theologian.
Text: Patristic scholar in the context of this sentence 'Cotelier or Cotelerius
was a Patristic scholar and Catholic theologian.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401A980>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Catholic theologian
Cotelier or Cotelerius was a Patristic scholar and Catholic theologian.
Text: Catholic theologian in the context of this sentence 'Cotelier or
Cotelerius was a Patristic scholar and Catholic theologian.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019B20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'L



photography
Color photography is photography that uses media capable of reproducing colors. By contrast, black-and-white (monochrome) photography records
Text: photography in the context of this sentence 'Color photography is
photography that uses media capable of reproducing colors. By contrast,
black-and-white (monochrome) photography records'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FFD80>, {'ABSTRACT': 0.0, 'ACTIVITY': 1.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
media
Color photography is photography that uses media capable of reproducing colors. By contrast, black-and-white (monochrome) photography records
Text: media in the context of this sentence 'Color photography is photography
that uses media capable of reproducing colors



year
Year 183 BC was a year of the pre-Julian Roman calendar. At the time it was known as
Text: year in the context of this sentence 'Year 183 BC was a year of the
pre-Julian Roman calendar. At the time it was known as'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2F2E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 1.0, 'TOPIC': 0.0})
pre-Julian Roman calendar
Year 183 BC was a year of the pre-Julian Roman calendar. At the time it was known as the Year
Text: pre-Julian Roman calendar in the context of this sentence 'Year 183 BC was
a year of the pre-Julian Roman calendar. At the time it was known as the Year'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019EE0>, {'ABSTRACT': 0.0, 'ACT



quantitative capacity
Alkalinity is the quantitative capacity of an aqueous
Text: quantitative capacity in the context of this sentence 'Alkalinity is the
quantitative capacity of an aqueous'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FEB60>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
aqueous solution
capacity of an aqueous solution to neutralize an
Text: aqueous solution in the context of this sentence 'capacity of an aqueous
solution to neutralize an'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FFEC0>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICA



Italian film actor
Tiberio Murgia was an Italian film actor. He appeared in more
Text: Italian film actor in the context of this sentence 'Tiberio Murgia was an
Italian film actor. He appeared in more'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E186BE20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 1.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_75.json




desert vegetation
is a type of desert vegetation, and an ecoregion characterized
Text: desert vegetation in the context of this sentence 'is a type of desert
vegetation, and an ecoregion characterized'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019B20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
ecoregion
desert vegetation, and an ecoregion characterized by this vegetation
Text: ecoregion in the context of this sentence 'desert vegetation, and an
ecoregion characterized by this vegetation'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FEF20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANIS



Prince of Savoy
Savoy was a Prince of Savoy and a 17th-century
Text: Prince of Savoy in the context of this sentence 'Savoy was a Prince of
Savoy and a 17th-century'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401A700>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 1.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
17th-century cardinal
of Savoy and a 17th-century cardinal.
Text: 17th-century cardinal in the context of this sentence 'of Savoy and a
17th-century cardinal.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E186B060>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'R



field of logic
Deontic logic is the field of logic that is concerned with
Text: field of logic in the context of this sentence 'Deontic logic is the field
of logic that is concerned with'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019B20>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 1.0})
obligation
that is concerned with obligation, permission, and related concepts.
Text: obligation in the context of this sentence 'that is concerned with
obligation, permission, and related concepts.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E88D60>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PE



family
and Kurdish origin, after the 1938 Dersim Rebellion, Süreya and his family were displaced to Bilecik, a city in the Marmara Region of
Text: family in the context of this sentence 'and Kurdish origin, after the 1938
Dersim Rebellion, Süreya and his family were displaced to Bilecik, a city in the
Marmara Region of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2DD00>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 1.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 1.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
pen name
Region of Turkey. This had a significant effect on his poems Cemâl Süreya, pen name of Cemâlettin Süreyya Seber, poet and writer.
Text: pen name in the context of this sentence 'Region of Turkey. This had a
significant effect on his poems Cemâl Süreya, pen name of Cemâlettin Süreyya
Sebe



science fiction anime
Flash is a humorous science fiction anime based on the light
Text: science fiction anime in the context of this sentence 'Flash is a humorous
science fiction anime based on the light'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E186B100>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 1.0})
light novel series
anime based on the light novel series by Haruka Takachiho.
Text: light novel series in the context of this sentence 'anime based on the
light novel series by Haruka Takachiho.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E4019800>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGAN



green electoral coalition
Europe Ecology is a green electoral coalition of political parties in France created for the 2009 European elections composed of
Text: green electoral coalition in the context of this sentence 'Europe Ecology
is a green electoral coalition of political parties in France created for the
2009 European elections composed of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E401A700>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
political parties
Europe Ecology is a green electoral coalition of political parties in France created for the 2009 European elections composed of
Text: political parties in the context of this sentence 'Europe Ecology is a
green electoral coalition of political partie



Arabic al-jebr
Algebra (from Arabic al-jebr meaning "reunion of broken parts") is the
Text: Arabic al-jebr in the context of this sentence 'Algebra (from Arabic
al-jebr meaning "reunion of broken parts") is the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2E980>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
mathematical symbols
parts") is the study of mathematical symbols and the rules for manipulating
Text: mathematical symbols in the context of this sentence 'parts") is the study
of mathematical symbols and the rules for manipulating'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E186AE80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 



public linear park
Mauerpark is a public linear park in Berlin's Prenzlauer Berg district. The name translates to "Wall Park", referring to its status as a former part
Text: public linear park in the context of this sentence 'Mauerpark is a public
linear park in Berlin's Prenzlauer Berg district. The name translates to "Wall
Park", referring to its status as a former part'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E88360>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Wall Park
linear park in Berlin's Prenzlauer Berg district. The name translates to "Wall Park", referring to its status as a former part of the Berlin Wall and
Text: Wall Park in the context of this sentence 'linear park in Berlin's
Prenzlauer



street
Via Garibaldi is street of the historical centre of Genoa, northern Italy, well known for its ancient palaces. The street dates back to the year 1550. Originally named as Strada Maggiore, then Strada Nuova, only in 1882 its name was dedicated to Giuseppe Garibaldi.
Text: street in the context of this sentence 'Via Garibaldi is street of the
historical centre of Genoa, northern Italy, well known for its ancient palaces.
The street dates back to the year 1550. Originally named as Strada Maggiore,
then Strada Nuova, only in 1882 its name was dedicated to Giuseppe Garibaldi.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2EE80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
ancient palaces
Via Garibaldi is 



matrimony
Marriage, also called matrimony or wedlock, is a socially recognized union between spouses that establishes rights
Text: matrimony in the context of this sentence 'Marriage, also called matrimony
or wedlock, is a socially recognized union between spouses that establishes
rights'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E89BC0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 1.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
wedlock
Marriage, also called matrimony or wedlock, is a socially recognized union between spouses that establishes rights
Text: wedlock in the context of this sentence 'Marriage, also called matrimony
or wedlock, is a socially recognized union between spouses that establishes
rights'
Categories: defaultdict(<function reduce_sh



constellation
Argo Navis was a large constellation in the southern sky that has since been divided into three constellations. It represented the Argo,
Text: constellation in the context of this sentence 'Argo Navis was a large
constellation in the southern sky that has since been divided into three
constellations. It represented the Argo,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3D2F560>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
1.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Greek mythology
the Argo, the ship used by Jason and the Argonauts in Greek mythology. The abbreviation was "Arg" and the genitive was "Argus Navis".
Text: Greek mythology in the context of this sentence 'the Argo, the ship used
by Jason and the Argonauts in Greek mythology. T



computers
AmigaOne is a series of computers intended to run AmigaOS 4 developed by Hyperion Entertainment. Earlier models were produced by Eyetech, and were based on the Teron series
Text: computers in the context of this sentence 'AmigaOne is a series of
computers intended to run AmigaOS 4 developed by Hyperion Entertainment. Earlier
models were produced by Eyetech, and were based on the Teron series'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FEB60>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
PowerPC POP mainboards
Earlier models were produced by Eyetech, and were based on the Teron series of PowerPC POP mainboards. In September 2009, Hyperion Entertainment secured exclusive licence for AmigaOne name a



graphic novel
The 'Jew of New York' is a graphic novel by Ben Katchor, inspired by Mordecai Manuel Noah's attempt to establish a Jewish homeland in
Text: graphic novel in the context of this sentence 'The 'Jew of New York' is a
graphic novel by Ben Katchor, inspired by Mordecai Manuel Noah's attempt to
establish a Jewish homeland in'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FE200>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Jewish homeland
by Ben Katchor, inspired by Mordecai Manuel Noah's attempt to establish a Jewish homeland in Grand Island, New York in the 1820s. It was originally serialized
Text: Jewish homeland in the context of this sentence 'by Ben Katchor, inspired
by Mordecai Manuel Noah's at



film
Round Midnight is an American-French musical drama film directed by Bertrand Tavernier and written by Tavernier and David Rayfiel.
Text: film in the context of this sentence 'Round Midnight is an American-French
musical drama film directed by Bertrand Tavernier and written by Tavernier and
David Rayfiel.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E38B80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Tavernier
is an American-French musical drama film directed by Bertrand Tavernier and written by Tavernier and David Rayfiel. It stars
Text: Tavernier in the context of this sentence 'is an American-French musical
drama film directed by Bertrand Tavernier and written by Tavernier and David
Rayfiel. It star



railway station
Hannover Hauptbahnhof is the main railway station for the city of Hanover in Lower Saxony, Germany. The station has six platforms with twelve platform tracks and
Text: railway station in the context of this sentence 'Hannover Hauptbahnhof is
the main railway station for the city of Hanover in Lower Saxony, Germany. The
station has six platforms with twelve platform tracks and'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376C0E0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
city
Hannover Hauptbahnhof is the main railway station for the city of Hanover in Lower Saxony, Germany. The station has six platforms with twelve platform tracks and
Text: city in the context of this sentence 'Hannover Hau



federal court
States Court of Appeals for the Tenth Circuit (in case citations, 10th Cir.) is a federal court with appellate jurisdiction over the district courts in the following districts: District of Colorado, District
Text: federal court in the context of this sentence 'States Court of Appeals for
the Tenth Circuit (in case citations, 10th Cir.) is a federal court with
appellate jurisdiction over the district courts in the following districts:
District of Colorado, District'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E39800>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
appellate jurisdiction
Appeals for the Tenth Circuit (in case citations, 10th Cir.) is a federal court with appellate jurisdiction over



Latin tag
Natura naturans is a Latin tag coined during the Middle Ages, meaning "Nature naturing", or more loosely, "nature doing what nature does". The Latin, naturans, is
Text: Latin tag in the context of this sentence 'Natura naturans is a Latin tag
coined during the Middle Ages, meaning "Nature naturing", or more loosely,
"nature doing what nature does". The Latin, naturans, is'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E880E0>, {'ABSTRACT': 1.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Nature naturing
Natura naturans is a Latin tag coined during the Middle Ages, meaning "Nature naturing", or more loosely, "nature doing what nature does". The Latin, naturans, is the present
Text: Nature naturing in the context of th



town
Laudio/Llodio is a town and municipality located in the province of Álava, in the Basque Country,
Text: town in the context of this sentence 'Laudio/Llodio is a town and
municipality located in the province of Álava, in the Basque Country,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5DF94E200>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 1.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
municipality
Laudio/Llodio is a town and municipality located in the province of Álava, in the Basque Country,
Text: municipality in the context of this sentence 'Laudio/Llodio is a town and
municipality located in the province of Álava, in the Basque Country,'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E8B600>, {'ABSTRACT': 0



Air battle for England
The Battle of Britain (German: Luftschlacht um England, literally "Air battle for England") is the name given to the air campaign waged by the
Text: Air battle for England in the context of this sentence 'The Battle of
Britain (German: Luftschlacht um England, literally "Air battle for England") is
the name given to the air campaign waged by the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5BC507100>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 1.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Processed and saved: results/doc_data_94.json




side scrolling shooter
Otomedius is a side scrolling shooter by Konami which featured
Text: side scrolling shooter in the context of this sentence 'Otomedius is a
side scrolling shooter by Konami which featured'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E376F4C0>, {'ABSTRACT': 0.0, 'ACTIVITY': 1.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
personification
by Konami which featured personification of space fighters from
Text: personification in the context of this sentence 'by Konami which featured
personification of space fighters from'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E3B600>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0



political event
Hopetoun Blunder was a political event immediately prior to the
Text: political event in the context of this sentence 'Hopetoun Blunder was a
political event immediately prior to the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FEE80>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 1.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
Federation
immediately prior to the Federation of the British colonies
Text: Federation in the context of this sentence 'immediately prior to the
Federation of the British colonies'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FCFE0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 1.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON':



album
Music from Macbeth is a 1972 album by progressive-rock band Third Ear Band. It consists of
Text: album in the context of this sentence 'Music from Macbeth is a 1972 album
by progressive-rock band Third Ear Band. It consists of'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21FEDE0>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 1.0, 'LOCATION': 0.0, 'ORGANISATION': 0.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 1.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
progressive-rock band
Music from Macbeth is a 1972 album by progressive-rock band Third Ear Band. It consists of the soundtrack
Text: progressive-rock band in the context of this sentence 'Music from Macbeth
is a 1972 album by progressive-rock band Third Ear Band. It consists of the
soundtrack'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E21



political party
Party is a social-democratic political party in France, and the
Text: political party in the context of this sentence 'Party is a
social-democratic political party in France, and the'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5E3E88400>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0, 'PERSON': 0.0, 'PERSONIFICATION': 0.0, 'PHYSICAL_OBJECT': 0.0, 'ROLE': 0.0,
'SITUATION': 0.0, 'SOCIAL_OBJECT': 0.0, 'TIME_INTERVAL': 0.0, 'TOPIC': 0.0})
French centre-left
France, and the largest party of the French centre-left.
Text: French centre-left in the context of this sentence 'France, and the
largest party of the French centre-left.'
Categories: defaultdict(<function reduce_shards_to_doc.<locals>.<lambda> at
0x000001F5DF94E200>, {'ABSTRACT': 0.0, 'ACTIVITY': 0.0, 'EVENT': 0.0,
'INFORMATION_ENTITY': 0.0, 'LOCATION': 0.0, 'ORGANISATION': 1.0, 'ORGANISM':
0.0,