In [1]:
import sys
import os
from pathlib import Path
# This appends the directory one level up (the root of your project) to the sys.path.
# Modify the path depending on the location of modules you want to import.
sys.path.append(os.path.abspath('../../'))

from config.config_managers import DashboardConfigManager
from dataManager import DataManager
from dash import Dash
import pandas as pd
import plotly.express as px
from abc import ABC, abstractmethod
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

2025-03-22 14:54:15 - INFO - PyTorch version 2.2.2 available.


In [2]:
CONFIG_PATH = Path("/Users/ay227/Desktop/Final-Year/Thesis-Experiments/Online-Dashboard-Phase/dashboard-config.yaml")
config_manager = DashboardConfigManager(CONFIG_PATH)
dev_config = config_manager.development_config    

app = Dash(__name__, suppress_callback_exceptions=True)

app_config = config_manager.app_config
server = app.server  # Flask server instance for caching
variants_data = None

data_manager = DataManager(config_manager, server)
dash_data = data_manager.load_data()

In [14]:
from dataclasses import dataclass, field
from typing import List, Dict, Set
from abc import ABC, abstractmethod
from collections import defaultdict
from seqeval.scheme import Entities, IOB2, IOB1
from seqeval.metrics.sequence_labeling import get_entities
pd.set_option("display.max_rows", 200)  # Display all rows

class EntityErrorAnalyzer(ABC):
    """Abstract base class for entity analysis."""

    def __init__(self, df):
        self.df = df
        self.y_true, self.y_pred = self.prepare_data(df)
        self.true_entities = []
        self.pred_entities = []

    @abstractmethod
    def extract_entities(self, y_data):
        """Extract entities based on the specific mode (strict or non-strict)."""
        pass

    @abstractmethod
    def prepare_entities(self):
        """Prepare true and predicted entities for analysis."""
        pass
    
    def prepare_data(self, df):
        core_data = df[df['Labels'] !=-100]
        y_true = core_data.groupby('Sentence Ids')['True Labels'].apply(list).tolist()
        y_pred = core_data.groupby('Sentence Ids')['Pred Labels'].apply(list).tolist()
        return y_true, y_pred
    
    def compute_false_negatives(self, entity_type):
        """Compute false negatives for a specific entity type."""
        return set(
            [e for e in self.true_entities if e[1] == entity_type]
        ) - set([e for e in self.pred_entities if e[1] == entity_type])

    def compute_false_positives(self, entity_type):
        """Compute false positives for a specific entity type."""
        return set(
            [e for e in self.pred_entities if e[1] == entity_type]
        ) - set([e for e in self.true_entities if e[1] == entity_type])

    def analyze_sentence_errors(self, target_entities, comparison_entities):
        """Analyze errors and return sentence IDs by error type."""
        error_sentences = defaultdict(set)  # Dictionary to hold sentence IDs for each error type
        non_o_errors = set()
        indexed_entities = defaultdict(list)

        # Index comparison entities by sentence
        for entity in comparison_entities:
            sen, entity_type, start, end = entity
            indexed_entities[sen].append(entity)

        # First pass: entity errors
        for target_entity in target_entities:
            t_sen, t_type, t_start, t_end = target_entity

            for comp_entity in indexed_entities[t_sen]:
                c_type, c_start, c_end = comp_entity[1:]

                if (
                    t_start == c_start
                    and t_end == c_end
                    and t_type != c_type
                    and target_entity not in non_o_errors
                ):
                    non_o_errors.add(target_entity)
                    error_sentences["Entity"].add(target_entity)

        # Second pass: boundary errors
        for target_entity in target_entities - non_o_errors:
            t_sen, t_type, t_start, t_end = target_entity

            for comp_entity in indexed_entities[t_sen]:
                c_sen, c_type, c_start, c_end = comp_entity

                if (
                    t_type == c_type
                    and (t_start <= c_start <= t_end or t_start <= c_end <= t_end)
                    and target_entity not in non_o_errors
                ):
                    non_o_errors.add(target_entity)
                    error_sentences["Boundary"].add(target_entity)

        # Third pass: combined entity and boundary errors
        for target_entity in target_entities - non_o_errors:
            t_sen, t_type, t_start, t_end = target_entity

            for comp_entity in indexed_entities[t_sen]:
                c_sen, c_type, c_start, c_end = comp_entity

                if (
                    c_type != t_type
                    and (t_start <= c_start <= t_end or t_start <= c_end <= t_end)
                    and target_entity not in non_o_errors
                ):
                    non_o_errors.add(target_entity)
                    error_sentences["Entity and Boundary"].add(target_entity)
                    # print(t_sen, t_start, t_end, c_sen, c_start, c_end)
                    # print(f' ({t_start} <= {c_start} <= {t_end} or {t_start} <= {c_end} <= {t_end})')
                    

        # Remaining unmatched errors are "O errors"
        for target_entity in target_entities - non_o_errors:
            t_sen, t_type, t_start, t_end = target_entity
            error_sentences["O"].add(target_entity)

        return {error_type: list(s_ids) for error_type, s_ids in error_sentences.items()}


    def analyze_component(self, error_type, entity_type=None):
        
        """Analyze errors (FP or FN) for a specific or all entity types."""
        self.prepare_entities()
        error_analysis = {}
        entity_types = (
            [entity_type]
            if entity_type
            else set(e[1] for e in self.true_entities + self.pred_entities)
        )

        for etype in entity_types:
            if error_type == "false_negatives":
                target_entities = self.compute_false_negatives(etype)
            elif error_type == "false_positives":
                target_entities = self.compute_false_positives(etype)
            else:
                raise ValueError("Error type must be 'false_negative' or 'false_positive'.")

            error_analysis[etype] = self.analyze_sentence_errors(
                target_entities, self.pred_entities if error_type == "false_negatives" else self.true_entities
            )

        return error_analysis
    
    def analyze_errors(self):
        self.prepare_entities()
        """Analyze both false positives and false negatives."""
        error_components = {"false_positives": defaultdict(set), "false_negatives": defaultdict(set)}

        for error_component in error_components.keys():
            results = self.analyze_component(error_component)
            for entity_type, errors in results.items():
                for error_type, sentences in errors.items():
                    error_components[error_component][error_type].update(sentences)

        # Convert sets to lists for consistency
        return {k: {etype: set(ids) for etype, ids in v.items()} for k, v in error_components.items()}
    
    


class StrictEntityAnalyzer(EntityErrorAnalyzer):
    """Analyzer for strict entity processing."""

    def extract_entities(self, y_data):
        """Extract entities in strict mode."""
        entities = Entities(y_data, IOB2, False)
        return self.adjust_end_index(entities)

    def prepare_entities(self):
        """Prepare true and predicted entities for strict mode."""
        self.true_entities = self.flatten_entities(self.extract_entities(self.y_true))
        self.pred_entities = self.flatten_entities(self.extract_entities(self.y_pred))

    def print_sentence(self, sen_id):
        """Print entities for a specific sentence ID."""
        true_entities = self.extract_entities(self.y_true).entities
        pred_entities = self.extract_entities(self.y_pred).entities
        print(f"True: {true_entities[sen_id]}")
        print(f"Pred: {pred_entities[sen_id]}")
        error = set(pred_entities[sen_id]) - set(true_entities[sen_id])
        print(f"Error in Pred: {error}")
        core_data = self.df[self.df['Labels'] !=-100]
        sentence_data = core_data[core_data['Sentence Ids']  == sen_id].copy()
        print(sentence_data[['Words', 'Sentence Ids', 'True Labels', 'Pred Labels', 'Strict True Entities', 'Strict Pred Entities', 'True Entities', 'Pred Entities']].head(60).to_string())

    @staticmethod
    def flatten_entities(entities):
        """Flatten strict entities into tuples."""
        return [e for sen in entities.entities for e in sen]
    
    @staticmethod
    def adjust_end_index(entities):
        """Adjust the end index for IOB2 entities to make them inclusive."""
        adjusted_entities = []
        for sentence_entities in entities.entities:  # Iterate through sentences
            adjusted_sentence = []
            for entity in sentence_entities:  # Iterate through entities in each sentence
                sentence_id, entity_type, start, end = entity.to_tuple()
                # Adjust end index
                adjusted_sentence.append((sentence_id, entity_type, start, end - 1))
            adjusted_entities.append(adjusted_sentence)
        entities.entities = adjusted_entities  # Replace with adjusted entities
        return entities
    
    
    
    
class NonStrictEntityAnalyzer(EntityErrorAnalyzer):
    """Analyzer for non-strict entity processing."""

    def extract_entities(self, y_data):
        """Extract entities in non-strict mode."""
        return [
            [(sen_id,) + entity for entity in get_entities(sen)]
            for sen_id, sen in enumerate(y_data)
        ]

    def prepare_entities(self):
        """Prepare true and predicted entities for non-strict mode."""
        self.true_entities = self.flatten_entities(self.extract_entities(self.y_true))
        self.pred_entities = self.flatten_entities(self.extract_entities(self.y_pred))

    def print_sentence(self, sen_id):
        """Print entities for a specific sentence ID."""
        true_entities = self.extract_entities(self.y_true)
        pred_entities = self.extract_entities(self.y_pred)
        print(f"True: {true_entities[sen_id]}")
        print(f"Pred: {pred_entities[sen_id]}")
        error = set(pred_entities[sen_id]) - set(true_entities[sen_id])
        print(f"Error in Pred: {error}")
        core_data = self.df[self.df['Labels'] !=-100]
        sentence_data = core_data[core_data['Sentence Ids']  == sen_id].copy()
        print(sentence_data[['Words', 'Sentence Ids', 'True Labels', 'Pred Labels', 'Strict True Entities', 'Strict Pred Entities', 'True Entities', 'Pred Entities']].head(60).to_string())
        
    @staticmethod
    def flatten_entities(entities):
        """Flatten non-strict entities into tuples."""
        return [e for sen in entities for e in sen]

class ErrorAnalysisManager:
    """Manages all error analysis workflows and stores results."""

    def __init__(self, df):
        """
        Initialize the manager with the dataset.

        Args:
            df (pd.DataFrame): The dataset containing y_true and y_pred.
        """
        self.df = df
        self.strict_analyzer = StrictEntityAnalyzer(df)
        self.non_strict_analyzer = NonStrictEntityAnalyzer(df)
        self.results = {
            "IOB2": {"false_negatives": None, "false_positives": None, "errors": None},
            "IOB": {"false_negatives": None, "false_positives": None, "errors": None},
        }

    def run_workflows(self):
        """Run all error analysis workflows."""
        self.results["IOB2"]["false_negatives"] = self.strict_analyzer.analyze_component("false_negatives")
        self.results["IOB2"]["false_positives"] = self.strict_analyzer.analyze_component("false_positives")
        self.results["IOB2"]["errors"] = self.strict_analyzer.analyze_errors()

        self.results["IOB"]["false_negatives"] = self.non_strict_analyzer.analyze_component("false_negatives")
        self.results["IOB"]["false_positives"] = self.non_strict_analyzer.analyze_component("false_positives")
        self.results["IOB"]["errors"] = self.non_strict_analyzer.analyze_errors()

    def get_results(self):
        """Get the results of all workflows."""
        return self.results

class SchemeComparator:
    """Facilitator for comparing annotation schemes."""

    def __init__(self, results):
        """
        Initialize the comparator with results from error analysis.

        Args:
            results (dict): Results from the manager's workflows, structured by scheme.
        """
        self.results = results

    def compare_component(self, component, entity_type):
        """
        Compare all error types for a specific entity across schemes.

        Args:
            entity_type (str): The entity type to compare (e.g., "MISC").

        Returns:
            dict: A dictionary with set operation results for all error types.
        """
        schemes = list(self.results.keys())
        if len(schemes) != 2:
            raise ValueError("Comparator requires exactly two schemes for comparison.")

        scheme_1, scheme_2 = schemes
        component_1 = self.results[scheme_1][component]
        component_2 = self.results[scheme_2][component]

        results = {}
        entity_1 = component_1.get(entity_type, {})
        entity_2 = component_2.get(entity_type, {})

        # Compare all error types under the given entity
        all_error_types = set(entity_1.keys()).union(set(entity_2.keys()))
        for error_type in all_error_types:
            set_1 = set(entity_1.get(error_type, []))
            set_2 = set(entity_2.get(error_type, []))

            results[error_type] = {
                "overlap": set_1 & set_2,
                f"{scheme_1} Only": set_1 - set_2,
                f"{scheme_2} Only": set_2 - set_1,
            }

        return results

    def compare_errors(self, component, error_type):
        """
        Compare errors across all entities and error types for both schemes.

        Returns:
            dict: A dictionary with set operation results for all error types.
        """
        schemes = list(self.results.keys())
        if len(schemes) != 2:
            raise ValueError("Comparator requires exactly two schemes for comparison.")

        schemes_map = {'scheme_1': 'IOB', 'scheme_2': 'IOB2'}
        errors_1 = self.results[schemes_map['scheme_1']]["errors"][component]
        errors_2 = self.results[schemes_map['scheme_2']]["errors"][component]

       
       
        comparison_result = ComparisonResult.from_lists(errors_1, errors_2, error_type, schemes_map)

        return comparison_result.to_dict()


@dataclass
class ComparisonResult:
    """Dataclass to store comparison results."""
    scheme_1_name: str
    scheme_2_name: str
    set_1_errors: Set[int] = field(default=set)
    set_2_errors: Set[int] = field(default=set)
    overlap: Set[int] = field(default_factory=set)
    scheme_1_only: Set[int] = field(default_factory=set)
    scheme_2_only: Set[int] = field(default_factory=set)

    @staticmethod
    def from_lists(errors_1: Dict, errors_2: Dict, error_type: str, schemes_map: Dict) -> "ComparisonResult":
        """
        Create a ComparisonResult from two lists.

        Args:
            lst_1: List of values from scheme 1.
            lst_2: List of values from scheme 2.

        Returns:
            ComparisonResult: Dataclass containing the comparison and statistics.
        """
        set_1 = set(errors_1.get(error_type, []))
        
        set_2 = set(errors_2.get(error_type, []))
        
        sentence_lst_1 = [error[0] for error in errors_1.get(error_type, [])]
        sentence_lst_2 = [error[0] for error in errors_2.get(error_type, [])]
        sentence_set_1 = set(sentence_lst_1)
        sentence_set_2 = set(sentence_lst_2)
        
        overlap = sentence_set_1 & sentence_set_2
        scheme_1_only = sentence_set_1 - sentence_set_2
        scheme_2_only = sentence_set_2 - sentence_set_1

        return ComparisonResult(
            scheme_1_name=schemes_map['scheme_1'],
            scheme_2_name=schemes_map['scheme_2'],
            set_1_errors= set_1,
            set_2_errors= set_2,
            overlap=overlap,
            scheme_1_only=scheme_1_only,
            scheme_2_only=scheme_2_only,
        )
        
    def to_dict(self) -> Dict[str, Dict[str, Set[int]]]:
        """R"Overlap": self.overlap, comparison results as a dictionary."""
        return {
            f"{self.scheme_1_name} Errors": self.set_1_errors,
            f"{self.scheme_2_name} Errors": self.set_2_errors,
            "Overlap": self.overlap,
            f"{self.scheme_1_name} Only Errors": self.scheme_1_only,
            f"{self.scheme_2_name} Only Errors": self.scheme_2_only,
        }


In [4]:
ar = dash_data['ANERCorp_CamelLab_arabertv02']
en = dash_data['conll2003_bert']

In [5]:
df = ar.analysis_data
df.columns


Index(['Sentence Ids', 'Token Positions', 'Words', 'Tokens', 'Word Pieces',
       'Core Tokens', 'True Labels', 'Token Selector Id', 'Pred Labels',
       'Agreements', 'X', 'Y', 'Labels', 'Losses', 'Token Ids', 'Global Id',
       'True Silhouette', 'Pred Silhouette', 'K=3', 'Boundary Clusters', 'K=4',
       'Entity Clusters', 'K=9', 'Token Clusters', 'Consistency Count',
       'Inconsistency Count', 'Total Train Occurrences', 'Local Token Entropy',
       'Token Max Entropy', 'Dataset Token Entropy', 'Local Word Entropy',
       'Word Max Entropy', 'Dataset Word Entropy', 'Tokenization Rate',
       'Error Type', 'O Confidence', 'B-PER Confidence', 'I-PER Confidence',
       'B-ORG Confidence', 'I-ORG Confidence', 'B-LOC Confidence',
       'I-LOC Confidence', 'B-MISC Confidence', 'I-MISC Confidence',
       'Prediction Entropy', 'Prediction Max Entropy', 'Token Confidence',
       'Variability', 'Pre X', 'Pre Y', 'Strict True Entities',
       'Strict Pred Entities', 'True Entiti

In [None]:
df

In [6]:
manager = ErrorAnalysisManager(df)
manager.run_workflows()
results = manager.get_results()

In [17]:
df[df['Sentence Ids'] == 189]

Unnamed: 0,Sentence Ids,Token Positions,Words,Tokens,Word Pieces,Core Tokens,True Labels,Token Selector Id,Pred Labels,Agreements,...,Strict Pred Entities,True Entities,Pred Entities,True Aligned Scheme,Pred Aligned Scheme,Consistency Ratio,Inconsistency Ratio,Token Entropy,Word Entropy,Prediction Uncertainty
6240,189,0,[CLS],[CLS],[CLS],[CLS],[CLS],[CLS]@#0@#189,[CLS],True,...,[CLS],[CLS],[CLS],True,True,0.0,0.0,-1.0,-1.0,0.003186
6241,189,1,البنكي,البنكي,البنكي,البنكي,O,البنكي@#1@#189,O,True,...,O,O,O,True,True,1.0,0.0,0.0,0.0,0.042588
6242,189,2,-,-,-,-,O,-@#2@#189,O,True,...,O,O,O,True,True,1.0,0.0,0.0,0.0,0.00142
6243,189,3,استيف,است,"است, ##يف",است,B-PER,است@#3@#189,B-PER,True,...,PERS,PERS,PERS,True,True,0.0,1.0,0.8113,-1.0,0.382725
6244,189,4,استيف,##يف,"است, ##يف",IGNORED,IGNORED,IGNORED@#4@#189,IGNORED,True,...,IGNORED,IGNORED,IGNORED,True,True,0.0,0.0,-1.0,-1.0,0.565191
6245,189,5,بوجول,بوج,"بوج, ##ول",بوج,I-PER,بوج@#5@#189,I-PER,True,...,PERS,PERS,PERS,True,True,0.0,1.0,0.9183,-1.0,0.40935
6246,189,6,بوجول,##ول,"بوج, ##ول",IGNORED,IGNORED,IGNORED@#6@#189,IGNORED,True,...,IGNORED,IGNORED,IGNORED,True,True,0.0,0.0,-1.0,-1.0,0.422411
6247,189,7,اسبانيا,اسبانيا,اسبانيا,اسبانيا,B-LOC,اسبانيا@#7@#189,B-LOC,True,...,LOC,LOC,LOC,True,True,1.0,0.0,0.0,0.0,0.039433
6248,189,8,كي,كي,كي,كي,B-MISC,كي@#8@#189,B-ORG,False,...,ORG,MISC,ORG,True,True,0.095238,0.904762,0.791119,0.791119,0.053661
6249,189,9,تي,تي,تي,تي,I-MISC,تي@#9@#189,I-ORG,False,...,ORG,MISC,ORG,True,True,0.0,1.0,0.89495,0.886814,0.074387


In [21]:
manager.non_strict_analyzer.print_sentence(128)

True: [(128, 'ORG', 1, 1), (128, 'LOC', 5, 5), (128, 'LOC', 9, 11), (128, 'ORG', 13, 13), (128, 'ORG', 22, 24), (128, 'ORG', 26, 26), (128, 'MISC', 48, 48), (128, 'LOC', 50, 50), (128, 'LOC', 55, 55)]
Pred: [(128, 'LOC', 5, 5), (128, 'LOC', 9, 11), (128, 'ORG', 13, 13), (128, 'ORG', 22, 24), (128, 'ORG', 26, 26), (128, 'LOC', 48, 48), (128, 'LOC', 50, 50), (128, 'LOC', 55, 55)]
Error in Pred: {(128, 'LOC', 48, 48)}
           Words  Sentence Ids True Labels Pred Labels Strict True Entities Strict Pred Entities True Entities Pred Entities
4441          عن           128           O           O                    O                    O             O             O
4442     البوينغ           128       B-ORG           O                  ORG                    O           ORG             O
4444        كانت           128           O           O                    O                    O             O             O
4445      ستتوقف           128           O           O                    O      

In [10]:
results['IOB2']

{'false_negatives': {'MISC': {'Entity': [(187, 'MISC', 4, 6),
    (128, 'MISC', 48, 48),
    (186, 'MISC', 3, 5),
    (189, 'MISC', 5, 7),
    (830, 'MISC', 17, 17),
    (946, 'MISC', 15, 15),
    (508, 'MISC', 36, 36),
    (188, 'MISC', 4, 6),
    (886, 'MISC', 0, 0),
    (574, 'MISC', 3, 3),
    (604, 'MISC', 7, 7),
    (191, 'MISC', 4, 6),
    (173, 'MISC', 12, 13)],
   'Boundary': [(171, 'MISC', 48, 53),
    (649, 'MISC', 25, 28),
    (626, 'MISC', 39, 40),
    (593, 'MISC', 11, 13),
    (166, 'MISC', 26, 26),
    (921, 'MISC', 24, 27),
    (167, 'MISC', 8, 9),
    (488, 'MISC', 7, 9),
    (486, 'MISC', 13, 16),
    (878, 'MISC', 12, 13),
    (851, 'MISC', 34, 35),
    (163, 'MISC', 14, 15),
    (851, 'MISC', 26, 31),
    (645, 'MISC', 15, 18),
    (379, 'MISC', 17, 19)],
   'Entity and Boundary': [(180, 'MISC', 5, 5),
    (250, 'MISC', 12, 14),
    (253, 'MISC', 4, 6),
    (618, 'MISC', 25, 27),
    (625, 'MISC', 26, 28),
    (586, 'MISC', 7, 9),
    (175, 'MISC', 4, 4),
    (857,

In [18]:
comparator = SchemeComparator(results)
component_comparison = comparator.compare_component("false_negatives", "LOC")
component_comparison
overall_comparison = comparator.compare_errors('false_negatives', 'Entity and Boundary')


In [20]:
component_comparison

{'O': {'overlap': {(0, 'LOC', 0, 0),
   (116, 'LOC', 0, 0),
   (116, 'LOC', 9, 9),
   (124, 'LOC', 0, 0),
   (130, 'LOC', 0, 0),
   (169, 'LOC', 34, 34),
   (183, 'LOC', 2, 2),
   (232, 'LOC', 0, 0),
   (238, 'LOC', 0, 0),
   (239, 'LOC', 0, 0),
   (240, 'LOC', 0, 0),
   (250, 'LOC', 0, 0),
   (260, 'LOC', 0, 0),
   (286, 'LOC', 0, 0),
   (315, 'LOC', 25, 26),
   (318, 'LOC', 21, 21),
   (360, 'LOC', 0, 0),
   (400, 'LOC', 20, 20),
   (403, 'LOC', 12, 12),
   (406, 'LOC', 10, 10),
   (473, 'LOC', 0, 0),
   (768, 'LOC', 0, 0),
   (863, 'LOC', 0, 0)},
  'IOB2 Only': {(678, 'LOC', 7, 7)},
  'IOB Only': {(79, 'LOC', 0, 0),
   (320, 'LOC', 0, 0),
   (382, 'LOC', 0, 0),
   (445, 'LOC', 0, 0),
   (574, 'LOC', 0, 0),
   (731, 'LOC', 0, 0)}},
 'Entity': {'overlap': {(95, 'LOC', 4, 4),
   (160, 'LOC', 25, 25),
   (282, 'LOC', 4, 4),
   (345, 'LOC', 0, 0),
   (479, 'LOC', 27, 27),
   (678, 'LOC', 6, 6),
   (700, 'LOC', 4, 4)},
  'IOB2 Only': set(),
  'IOB Only': set()},
 'Entity and Boundary': {'

In [19]:
overall_comparison

{'IOB Errors': {(20, 'ORG', 41, 41),
  (35, 'MISC', 1, 3),
  (166, 'ORG', 25, 25),
  (175, 'MISC', 4, 4),
  (175, 'ORG', 3, 3),
  (179, 'MISC', 6, 6),
  (179, 'ORG', 5, 5),
  (180, 'MISC', 5, 5),
  (180, 'ORG', 4, 4),
  (181, 'MISC', 5, 5),
  (181, 'ORG', 4, 4),
  (184, 'MISC', 5, 7),
  (196, 'PER', 1, 2),
  (232, 'PER', 19, 20),
  (250, 'MISC', 12, 14),
  (320, 'ORG', 17, 17),
  (584, 'LOC', 4, 4),
  (586, 'MISC', 7, 9),
  (618, 'MISC', 25, 27),
  (619, 'MISC', 12, 14),
  (625, 'MISC', 26, 28),
  (630, 'MISC', 10, 16),
  (693, 'LOC', 2, 2),
  (695, 'LOC', 30, 30),
  (845, 'PER', 0, 0),
  (857, 'MISC', 7, 10),
  (879, 'MISC', 2, 3),
  (915, 'ORG', 2, 2),
  (917, 'ORG', 2, 2),
  (921, 'MISC', 18, 19)},
 'IOB2 Errors': {(20, 'ORG', 41, 41),
  (124, 'ORG', 26, 29),
  (166, 'ORG', 25, 25),
  (175, 'MISC', 4, 4),
  (175, 'ORG', 3, 3),
  (179, 'MISC', 6, 6),
  (179, 'ORG', 5, 5),
  (180, 'MISC', 5, 5),
  (180, 'ORG', 4, 4),
  (181, 'MISC', 5, 5),
  (181, 'ORG', 4, 4),
  (196, 'PER', 1, 2),
 

In [18]:
strict_entity_misclassifications

Unnamed: 0,O,PERS,MISC,LOC,ORG
O,0,78,23,21,64
PERS,63,0,5,9,14
MISC,122,6,0,17,23
LOC,27,2,3,0,5
ORG,103,23,9,26,0


In [17]:
analysis_data['Strict True Entities'].value_counts()

Strict True Entities
O          21700
IGNORED     2798
PERS        1442
[CLS]        961
[SEP]        961
LOC          743
ORG          716
MISC         390
Name: count, dtype: int64

In [None]:
lang_df = en
analysis_data = lang_df.analysis_data
train_data = lang_df.train_data
attention_similarity_heatmap = lang_df.attention_similarity_heatmap
attention_similarity_matrix = lang_df.attention_similarity_matrix
attention_weights_similarity_heatmap = lang_df.attention_weights_similarity_heatmap
attention_weights_similarity_matrix = lang_df.attention_weights_similarity_matrix
centroids_avg_similarity_matrix = lang_df.centroids_avg_similarity_matrix
entity_non_strict_confusion_data = lang_df.entity_non_strict_confusion_data
strict_entity_misclassifications = lang_df.strict_entity_misclassifications
non_strict_entity_misclassifications = lang_df.non_strict_entity_misclassifications
entity_non_strict_report = lang_df.entity_non_strict_report
entity_strict_confusion_data = lang_df.entity_strict_confusion_data
entity_strict_report = lang_df.entity_strict_report
kmeans_results = lang_df.kmeans_results 

In [10]:
non_strict_entity_misclassifications

Unnamed: 0,O,MISC,LOC,ORG,PER
O,0,99,36,78,22
MISC,51,0,33,58,10
LOC,20,29,0,66,9
ORG,39,47,77,0,21
PER,13,3,15,30,0
