In [None]:
import sys
import os
from pathlib import Path
# This appends the directory one level up (the root of your project) to the sys.path.
# Modify the path depending on the location of modules you want to import.
sys.path.append(os.path.abspath('../'))

In [None]:
from config.config_managers import DashboardConfigManager
from dataManager import DataManager



In [None]:
CONFIG_PATH = Path("/Users/ay227/Desktop/Final-Year/Thesis-Experiments/Online-Dashboard-Phase/dashboard-config.yaml")
config_manager = DashboardConfigManager(CONFIG_PATH)
dev_config = config_manager.development_config    

In [None]:

from dash import Dash, dcc, html, Output, Input, State
app = Dash(__name__, suppress_callback_exceptions=True)

app_config = config_manager.app_config
server = app.server  # Flask server instance for caching
variants_data = None

data_manager = DataManager(config_manager, server)

In [None]:
data_manager.load_data()

In [None]:
analysis_data = data_manager.variants_data['ANERCorp_CamelLab_arabertv02'].analysis_data
errors = analysis_data[analysis_data['Error Type'] != 'No Errors']

In [None]:
import pandas as pd
from collections import defaultdict, Counter
from seqeval.scheme import auto_detect, Entities
from seqeval.metrics.sequence_labeling import get_entities

class EntityAnnotator:
    def __init__(self, y_true, y_pred):
        """Initialize the annotator with true and predicted labels."""
        self.y_true = y_true
        self.y_pred = y_pred
        self.scheme = auto_detect(self.y_true, False)
    
    def extract_entities(self):
        """Extract entities based on the scheme detected."""
        self.entities_strict_true = Entities(self.y_true, self.scheme, False)
        self.entities_strict_pred = Entities(self.y_pred, self.scheme, False)
        self.entities_true = get_entities(self.y_true)
        self.entities_pred = get_entities(self.y_pred)

    def process_strict_entities(self, y_true, entities_true, sen_id):
        """Process entities strictly, labeling full spans in a sentence."""
        max_len = len(y_true[sen_id])
        results = ['O'] * max_len
        for idx in range(max_len):
            for entity in entities_true[sen_id]:
                _, t, s, e = entity.to_tuple()
                if s == idx and (e-s) > 0:
                    for i in range(e-s):
                        results[s + i] = t
                elif (e-s) == 0:
                    results[s] = t
        return results
    
    def process_non_strict_entities(self, y_true, sen_id):
        """Process entities non-strictly, marking only the start and end of each entity."""
        max_len = len(y_true[sen_id])
        results = ['O'] * max_len
        for entity in get_entities(y_true[sen_id]):
            t, s, e = entity
            if s == e:
                # If start and end are the same, only mark the start
                results[s] = t
            else:
                # Mark all indices from start to end inclusive
                for i in range(s, e + 1):
                    results[i] = t
        return results

    def process_sentences(self, analysis_data, y_data, entities, label_column, strict=False):
        """Annotate sentences with entity information, either strictly or non-strictly."""
        entity_annotations = []
        for sentence_id, sentence_df in analysis_data.groupby('Sentence Ids'):
            if strict: 
                results = self.process_strict_entities(y_data, entities, sentence_id)
            else:
                results = self.process_non_strict_entities(y_data, sentence_id)
            original_series = sentence_df[label_column]
            is_metadata = original_series.apply(lambda x: x not in ['[CLS]', '[SEP]', 'IGNORED'])
            new_series = original_series.copy()
            new_series.loc[is_metadata] = results
            entity_annotations.append(new_series)
        return pd.concat(entity_annotations)

    def annotate_entity_info(self, analysis_data):
        """Add annotated entity information to the analysis data for both true and predicted labels."""
        self.extract_entities()  # Ensure entities are extracted before processing
        analysis_data['Strict True Entities'] = self.process_sentences(analysis_data, self.y_true, self.entities_strict_true.entities, 'True Labels', True)
        analysis_data['Strict Pred Entities'] = self.process_sentences(analysis_data, self.y_pred, self.entities_strict_pred.entities, 'Pred Labels', True)
        analysis_data['True Entities'] = self.process_sentences(analysis_data, self.y_true, self.entities_true, 'True Labels')
        analysis_data['Pred Entities'] = self.process_sentences(analysis_data, self.y_pred, self.entities_pred, 'Pred Labels')
        
        return analysis_data


In [None]:
import json
from collections import defaultdict, Counter
from seqeval.scheme import auto_detect
from seqeval.metrics.sequence_labeling import get_entities
from seqeval.scheme import Entities

file_name = '/Users/ay227/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com/My Drive/Final Year Experiments/Thesis-Experiments/Experiments/BaseLineExperiment/ANERCorp_CamelLab_arabertv02/fine_tuning/evaluation_metrics.json'
with open(file_name, 'r') as file:
    entity_outputs = json.load(file)  # Use json.load() to read file, not json.loads()

def extract_entities(y_data, scheme):
        # Replace with the Entities() logic if provided
        return Entities(y_data, scheme, False)
core_data = analysis_data[analysis_data['Labels']!= -100].copy()


# y_true = entity_outputs['entity_outputs']['y_true']
y_true = core_data.groupby('Sentence Ids')['True Labels'].apply(list).tolist()
# y_pred = entity_outputs['entity_outputs']['y_pred']
y_pred = core_data.groupby('Sentence Ids')['Pred Labels'].apply(list).tolist()


In [None]:
annotator = EntityAnnotator(y_true, y_pred)
updated_analysis_data = annotator.annotate_entity_info(analysis_data)


In [None]:
updated_analysis_data[['True Entities', 'TR Entity']]

In [None]:
(updated_analysis_data['True Entities'] != updated_analysis_data['TR Entity']).value_counts()

In [None]:
updated_analysis_data[updated_analysis_data['True Entities'] != updated_analysis_data['Strict True Entities']][['Sentence Ids', 'True Entities', 'TR Entity']]

In [None]:
sen_id = 15
get_entities(y_true[sen_id])

In [None]:
ts = updated_analysis_data[updated_analysis_data['Sentence Ids'] == sen_id]

ts[~ts['True Labels'].isin(['[CLS]', '[SEP]', 'IGNORED'])][['Sentence Ids', 'True Entities', 'Strict True Entities', 'Pred Entities', 'Strict Pred Entities','Pred Labels', 'True Labels']].reset_index()

In [None]:
scheme = auto_detect(y_true, False)
entities_true = extract_entities(y_true)
entities_pred = extract_entities(y_pred)
true_entities = get_entities(y_true)

In [None]:
def process_strict_entities(y_true, entities_true, sen_id):
    """
    Process entities for a given sentence ID and return a list of results where entities are labeled,
    and non-entity indices are marked as 'O'.

    Args:
    y_true (dict): Dictionary where keys are sentence IDs and values are lists of true labels.
    entities_true (dict): Dictionary where keys are sentence IDs and values are lists of entity objects with a to_tuple method.
    sen_id (int): Sentence ID to process.

    Returns:
    list: A list of labels with entities labeled accordingly and other indices as 'O'.
    """
    # Determine the length needed for the results list
    max_len = len(y_true[sen_id])

    # Initialize results with 'O' for all expected indices
    results = ['O'] * max_len

    # Iterate over each index in the range of y_true for the given sentence ID
    for idx in range(max_len):
        # Check for entities at this index
        for entity in entities_true[sen_id]:
            _, t, s, e = entity.to_tuple()
            # Check if the entity starts at this index and has length
            if s == idx and (e-s) > 0:
                for i in range(e-s):
                    results[s + i] = t  # Replace 'some_label' with your intended label
            elif (e-s) == 0:
                results[s] = t  # Set your intended label for zero-length entities
            # If none of the conditions are met, it will retain 'O'

    return results


In [None]:
analysis_data.columns

In [None]:
entity_annotations = []
for sentence_id, sentence_df in analysis_data.groupby('Sentence Ids'):
    results = process_strict_entities(y_true, entities_true.entities, sentence_id)
    original_series = sentence_df['True Labels']
    is_metadata = original_series.apply(lambda x: x not in ['[CLS]', '[SEP]', 'IGNORED'])
    new_series = original_series.copy()
    new_series.loc[is_metadata] = results
    entity_annotations.append(new_series)


In [None]:
pd.concat(entity_annotations)

In [None]:
sen_id = 18
data = analysis_data[~analysis_data['TR Entity'].isin(['[CLS]', '[SEP]', 'IGNORED'])]
sen_data = data[data['Sentence Ids'] == sen_id].reset_index()
sen_data[['Sentence Ids', 'True Labels', 'TR Entity']]

In [None]:
lresults

In [None]:
# Determine the length needed for the results list
max_len = len(y_true[sen_id])

# Initialize results with 'O' for all expected indices
results = ['O'] * max_len

# Iterate over each index in the range of y_true[694]
for idx in range(max_len):
    # Check for entities at this index
    for entity in entities_true.entities[sen_id]:
        sen_id, t, s, e = entity.to_tuple()
        # Check if the entity starts at this index and has length
        if s == idx and (e-s) > 0:
            for i in range(e-s):
                results[s + i] = t  # Replace 'some_label' with your intended label
        elif (e-s) == 0:
            results[s] = t  # Again, set your intended label for zero-length entities
        # If none of the conditions are met, it will retain 'O'


In [None]:
# Determine the length needed for the results list
max_len = len(y_true[sen_id])

# Initialize results with 'O' for all expected indices
lresults = ['O'] * max_len

# Iterate over each index in the range of y_true[694]
for idx in range(max_len):
    # Check for entities at this index
    for entity in get_entities(y_true[sen_id]):
        t, s, e = entity
        # Check if the entity starts at this index and has length
        
        if s == e:
            # If start and end are the same, only mark the start
            lresults[s] = t
        else:
            # Mark all indices from start to end inclusive
            for i in range(s, e + 1):
                lresults[i] = t
        


In [None]:
lresults

In [None]:
original_series = analysis_data[analysis_data["Sentence Ids"] == sen_id]['True Labels']
is_metadata = original_series.apply(lambda x: x not in ['[CLS]', '[SEP]', 'IGNORED'])


In [None]:
new_series = original_series.copy()
non_metadata_indices = original_series.index[is_metadata]
new_series.loc[non_metadata_indices] = results


In [None]:
original_series

In [None]:
sen_data['TR Entity']

In [None]:
new_series

In [None]:
get_entities(y_true[sen_id])

In [None]:
entities_true.entities[sen_id]


In [None]:
for i, e in enumerate(lresults
):
    print(i, e)

In [None]:
# Determine the length needed for the results list
max_len = len(y_true[0])

# Initialize results with 'O' for all expected indices
results = ['O'] * max_len

# Iterate over each index in the range of y_true[694]
for idx in range(max_len):
    # Check for entities at this index
    for entity in entities_true.entities[0]:
        sen_id, t, s, e = entity.to_tuple()
        # Check if the entity starts at this index and has length
        if s == idx and (e-s) > 0:
            for i in range(e-s):
                results[s + i] = t  # Replace 'some_label' with your intended label
        elif (e-s) == 0:
            results[s] = t  # Again, set your intended label for zero-length entities
        # If none of the conditions are met, it will retain 'O'


In [None]:
results

In [None]:
errors[errors['Error Type'] == 'Type'][['True Labels', 'Pred Labels', 'Error Type']].sample(60)

In [None]:
errors[['True Labels', 'Pred Labels', 'Error Type']].sample(60)

In [None]:
def annotate_error(true_label, pred_label):
        # If both are the same, it's correct (no error)
        if true_label == pred_label:
            return "No Errors"
        
        # Handle cases where one or both labels are 'O'
        if true_label == 'O' and pred_label != 'O':
            return "Chunk"  # False entity predicted
        if true_label != 'O' and pred_label == 'O':
            return "Exclusion"  # Missed entity and chunk boundary
        
        # Extract entity types without position tags (like "B-", "I-")
        true_entity = true_label.split("-")[-1] if "-" in true_label else true_label
        pred_entity = pred_label.split("-")[-1] if "-" in pred_label else pred_label

        # If entity types are different (e.g., LOC vs. PER)
        if true_entity != pred_entity:
            # If both entity type and position (B- vs I-) are wrong
            return "Type and Chunk" if true_label[0] != pred_label[0] else "Type"

        # If entity types are the same but position tags (B- vs I-) are wrong
        return "Chunk"


In [None]:
import json

file_name = '/Users/ay227/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com/My Drive/Final Year Experiments/Thesis-Experiments/Experiments/BaseLineExperiment/ANERCorp_CamelLab_arabertv02/fine_tuning/evaluation_metrics.json'
with open(file_name, 'r') as file:
    entity_outputs = json.load(file)  # Use json.load() to read file, not json.loads()


In [None]:
y_true = entity_outputs['entity_outputs']['y_true']
y_pred = entity_outputs['entity_outputs']['y_pred']

In [None]:
from seqeval.metrics import f1_score, classification_report, f1_score
from seqeval.scheme import IOB2

print(classification_report(y_true, y_pred, mode='strict', digits=4))

f1_score(y_true, y_pred, mode='strict', scheme=IOB2,  average='micro')


In [None]:
from seqeval.metrics import f1_score, classification_report

print(classification_report(y_true, y_pred, mode=None, digits=4))


In [None]:
from seqeval.metrics import f1_score

# Ground truth (true labels)
y_true = [
    ['O', 'B-PER', 'I-PER', 'O', 'B-LOC', 'O'],
    ['O', 'B-ORG', 'I-ORG', 'O']
]

# Predicted labels (with minor errors)
y_pred = [
    ['O', 'B-PER', 'O', 'O', 'B-LOC', 'O'],  # Misses I-PER
    ['O', 'B-ORG', 'O', 'O']                # Misses I-ORG
]

# Default mode
f1_default = f1_score(y_true, y_pred, average='micro', mode=None)

# Strict mode
f1_strict = f1_score(y_true, y_pred, average='micro', mode='strict')

print("Default Mode F1 Score:", f1_default)
print("Strict Mode F1 Score:", f1_strict)


In [None]:
from seqeval.metrics import f1_score, classification_report
from seqeval.scheme import IOB1
# Define data
y_true = [['B-PER', 'I-PER', 'O', 'B-ORG', 'I-ORG', 'O']]
y_pred = [['B-PER', 'I-ORG', 'O', 'B-ORG', 'I-ORG', 'O']]

# Strict mode, no scheme
# f1_no_scheme_strict = f1_score(y_true, y_pred, average='micro', mode='strict', scheme=None)
f1_no_scheme_strict = classification_report(y_true, y_pred, mode='strict', zero_division='Warn')
no_strict = classification_report(y_true, y_pred, mode=None,)

print("Strict Mode with No Scheme F1 Score:", f1_no_scheme_strict)

print("No Strict Mode with F1 Score:", no_strict)




In [None]:

entity_y_true = get_entities(y_true)
entity_y_pred = get_entities(y_pred)

# Example usage
# conf_matrix = calculate_confusion_matrix([e.to_tuple()[1:] for sen in entities_true.entities for e in sen], [e.to_tuple()[1:] for sen in entities_pred.entities for e in sen])



conf_matrix = calculate_confusion_matrix(entity_y_true, entity_y_pred)
print(conf_matrix)
fn_errors = compute_false_negatives(entity_y_true, entity_y_pred)
fp_errors = compute_false_positives(entity_y_true, entity_y_pred)

print("False Negatives:", dict(fn_errors))
print("False Positives:", dict(fp_errors))
1

In [None]:
entity_y_true

In [None]:
entity_y_pred

In [None]:


entities_true = Entities(y_true, scheme, False)
entities_pred = Entities(y_pred, scheme, False)
true_entity_type = flatten_strict_entities(entities_true)
pred_entity_type = flatten_strict_entities(entities_pred)
# Example usage
# conf_matrix = calculate_confusion_matrix([e.to_tuple()[1:] for sen in entities_true.entities for e in sen], [e.to_tuple()[1:] for sen in entities_pred.entities for e in sen])



conf_matrix = calculate_confusion_matrix(true_entity_type, pred_entity_type)
print(conf_matrix)
fn_errors = compute_false_negatives(true_entity_type, pred_entity_type)
fp_errors = compute_false_positives(true_entity_type, pred_entity_type)

print("False Negatives:", dict(fn_errors))
print("False Positives:", dict(fp_errors))
1

In [None]:
entities_true.entities

In [None]:
entities_pred.entities

In [None]:
print(no_strict)

In [None]:
from seqeval.scheme import IOB2, Tokens, auto_detect


In [None]:
scheme = auto_detect(y_true, False)

In [None]:
scheme

In [None]:
tokens = Tokens(['B-PER', 'I-PER', 'O', 'B-LOC'], scheme)

In [None]:
# outputs = {
#     'y_true': [['O', 'O', 'B-MISC', 'I-MISC', 'B-MISC', 'O', 'O'], ['B-PER', 'I-PER', 'O']],
# 	'y_pred': [['O', 'O', 'B-MISC', 'I-LOC', 'B-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]

# }


In [None]:
from seqeval.metrics.sequence_labeling import get_entities
# Example usage
entity_y_true = get_entities(entity_outputs['entity_outputs']['y_true'])
entity_y_pred = get_entities(entity_outputs['entity_outputs']['y_pred'])

from seqeval.scheme import Entities
entities_true = Entities(entity_outputs['entity_outputs']['y_true'], scheme, False)
entities_pred = Entities(entity_outputs['entity_outputs']['y_pred'], scheme, False)



In [None]:
from collections import defaultdict, Counter

def flatten_strict_entities(entities):
    return [e.to_tuple()[1:] for sen in entities.entities for e in sen]

def calculate_confusion_matrix(y_true, y_pred):
    # Initialize confusion matrix data structure
    types = set([ent[0] for ent in y_true]).union([ent[0] for ent in y_pred])
    confusion_matrix = {typ: {'TP': 0, 'FP': 0, 'FN': 0} for typ in types}


    # Track matched predictions to avoid counting them more than once
    matched_pred_indices = set()

    # Check each true entity against predicted entities
    for true_ent in y_true:
        true_type, true_start, true_end = true_ent
        match_found = False

        for idx, pred_ent in enumerate(y_pred):
            pred_type, pred_start, pred_end = pred_ent

            if idx not in matched_pred_indices and true_type == pred_type and true_start == pred_start and true_end == pred_end:
                confusion_matrix[true_type]['TP'] += 1
                matched_pred_indices.add(idx)
                match_found = True
                break
        
        if not match_found:
            confusion_matrix[true_type]['FN'] += 1


    # Any unmatched prediction is a false positive
    for idx, pred_ent in enumerate(y_pred):
        if idx not in matched_pred_indices:
            pred_type = pred_ent[0]
            confusion_matrix[pred_type]['FP'] += 1

    return confusion_matrix

entities_true = Entities(entity_outputs['entity_outputs']['y_true'], scheme, False)
entities_pred = Entities(entity_outputs['entity_outputs']['y_pred'], scheme, False)
true_entity_type = flatten_strict_entities(entities_true)
pred_entity_type = flatten_strict_entities(entities_pred)
# Example usage
# conf_matrix = calculate_confusion_matrix([e.to_tuple()[1:] for sen in entities_true.entities for e in sen], [e.to_tuple()[1:] for sen in entities_pred.entities for e in sen])




In [None]:
conf_matrix = calculate_confusion_matrix(entity_y_true, entity_y_pred)
print(conf_matrix)

In [None]:
(627+151+751+338) / ((627+92)+(151+154)+(751+49)+(338+121))

In [None]:
1867 / 2201

In [None]:
conf_matrix = calculate_confusion_matrix(true_entity_type, pred_entity_type)
print(conf_matrix)

In [None]:
total_metrics = {}
for metric in ['TP', 'FP', 'FN']:
    total_metrics[metric] = sum(details[metric] for details in conf_matrix.values())


In [None]:
total_metrics

In [None]:
1976 / (1976+189)

In [None]:
entity_false_positives

In [None]:

from collections import defaultdict, Counter

def compute_false_negatives(y_true, y_pred):
    fn_counts = defaultdict(Counter)
    true_indexed = {(t[1], t[2]): t[0] for t in y_true}  # Index true entities by boundaries
    pred_indexed = {(p[1], p[2]): p[0] for p in y_pred}  # Index predicted entities by boundaries

    # Iterate through true entities to find false negatives
    for (t_start, t_end), t_type in true_indexed.items():
        if (t_start, t_end) not in pred_indexed or pred_indexed[(t_start, t_end)] != t_type:
            # No matching prediction or type mismatch at the same position
            matched_type = pred_indexed.get((t_start, t_end), 'Boundary')
            fn_counts[t_type][matched_type] += 1

    return fn_counts

def compute_false_positives(y_true, y_pred):
    fp_counts = defaultdict(Counter)
    true_indexed = {(t[1], t[2]): t[0] for t in y_true}  # Index true entities by boundaries
    pred_indexed = {(p[1], p[2]): p[0] for p in y_pred}  # Index predicted entities by boundaries

    # Iterate through predicted entities to find false positives
    for (p_start, p_end), p_type in pred_indexed.items():
        if (p_start, p_end) not in true_indexed or true_indexed[(p_start, p_end)] != p_type:
            # No matching true entity or type mismatch at the same position
            matched_type = true_indexed.get((p_start, p_end), 'Boundary')
            fp_counts[p_type][matched_type] += 1

    return fp_counts

# Example usage
fn_errors = compute_false_negatives(entity_y_true, entity_y_pred)
fp_errors = compute_false_positives(entity_y_true, entity_y_pred)

print("False Negatives:", dict(fn_errors))
print("False Positives:", dict(fp_errors))
1

In [None]:
import plotly.express as px
import pandas as pd

# Your original data
data = conf_matrix

# Prepare lists for DataFrame construction
actual = []
predicted = []
counts = []

for (act, pred), count in data.items():
    actual.append(act)
    predicted.append('None' if pred is None else pred)  # Replace None with 'None' for better visualization
    counts.append(count)

# Create DataFrame
df = pd.DataFrame({'Actual': actual, 'Predicted': predicted, 'Count': counts})

# Pivot to format suitable for heatmap
pivot_table = df.pivot(index='Actual', columns='Predicted', values='Count').fillna(0)

# Generate heatmap
fig = px.imshow(pivot_table,
                labels=dict(x="Predicted Entity Type", y="Actual Entity Type", color="Count"),
                x=pivot_table.columns,
                y=pivot_table.index,
                text_auto=True,
                aspect="auto")

fig.update_layout(
    title="Entity Recognition Confusion Matrix",
    xaxis_title="Predicted Entity Type",
    yaxis_title="Actual Entity Type"
)

fig.show()


In [None]:
errors

In [None]:
import plotly.express as px
import pandas as pd

# Your original data
data = conf_matrix1

# Prepare lists for DataFrame construction
actual = []
predicted = []
counts = []

for (act, pred), count in data.items():
    actual.append(act)
    predicted.append('None' if pred is None else pred)  # Replace None with 'None' for better visualization
    counts.append(count)

# Create DataFrame
df = pd.DataFrame({'Actual': actual, 'Predicted': predicted, 'Count': counts})

# Pivot to format suitable for heatmap
pivot_table = df.pivot(index='Actual', columns='Predicted', values='Count').fillna(0)

# Generate heatmap
fig = px.imshow(pivot_table,
                labels=dict(x="Predicted Entity Type", y="Actual Entity Type", color="Count"),
                x=pivot_table.columns,
                y=pivot_table.index,
                text_auto=True,
                aspect="auto")

fig.update_layout(
    title="Entity Recognition Confusion Matrix",
    xaxis_title="Predicted Entity Type",
    yaxis_title="Actual Entity Type"
)

fig.show()


# Debugging

In [None]:
ENTITY = 'LOC'
entity_false_negatives = {ENTITY: Counter()}
false_negatives = set([e for e in entity_y_true if e[0] == 'LOC']) - set([e for e in entity_y_pred if e[0] == 'LOC'])
for e in false_negatives:
    t_type, t_start, t_end = e
    for pred_ent in entity_y_pred:
        p_type, p_start, p_end = pred_ent
        if t_start == p_start and t_start == p_end:
            if p_type == 'LOC':
                print(pred_ent)
            entity_false_negatives[t_type][p_type]+=1
            

ENTITY = 'LOC'
entity_false_positives = {ENTITY: Counter()}
false_positive = set([e for e in entity_y_pred if e[0] == ENTITY]) - set([e for e in entity_y_true if e[0] == ENTITY]) 
for e in false_positive:
    p_type, p_start, p_end = e
    for true_ent in entity_y_true:
        t_type, t_start, t_end = true_ent
        if t_start == p_start and t_end == p_end:
            # if p_type == 'ORG':
            #     # if t_type == 'ORG':
            #         print(true_ent)
            if p_type == t_type:
                entity_false_positives[p_type][t_type]+=1

In [None]:
entity_false_positives

In [None]:
id = 8786
for entity in entity_y_true:
    t, s, e = entity
    if s == id:
        print(entity)
for entity in entity_y_pred:
    t, s, e = entity
    if s == id or e == id+1:
        print(entity)

In [None]:
for entity in false_negatives:
    t, s, e = entity
    # if t == 'LOC':
    #     print(entity)
    if s == 8786:
        print(entity)

In [None]:
for entity in false_positive:
    t, s, e = entity
   
    if s == 16466 or e == 16467:
        print(entity)

In [None]:
for entity in entity_y_true:
    t, s, e = entity
    if s == 16963:
        print(entity)

In [None]:
for entity in entity_y_pred:
    t, s, e = entity
    if s == 16963:
        print(entity)

In [None]:
analysis_data.iloc[8780:8790]