In [1]:
import sys
import os
from pathlib import Path
# This appends the directory one level up (the root of your project) to the sys.path.
# Modify the path depending on the location of modules you want to import.
sys.path.append(os.path.abspath('../../'))

from config.config_managers import DashboardConfigManager
from dataManager import DataManager
from dash import Dash
import pandas as pd
import plotly.express as px
from abc import ABC, abstractmethod
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

2025-09-21 21:13:57 - INFO - PyTorch version 2.2.2 available.


In [2]:
from seqeval.metrics.sequence_labeling import get_entities
from seqeval.scheme import IOB2, Entities
true_labels = ["O", "B-PER", "I-PER", "O", "B-LOC", "I-LOC"]\

entities = get_entities(true_labels)
print(entities)

strict_entities = Entities([true_labels], IOB2)
print(strict_entities.entities)

[('PER', 1, 2), ('LOC', 4, 5)]
[[(0, PER, 1, 3), (0, LOC, 4, 6)]]


In [63]:
class Visualization(ABC):
    def __init__(self, data, mappings):
        self.data = data
        self.tag_mapping = mappings['tag_mapping']
        self.dataset_mapping = mappings['dataset_mapping']

    @abstractmethod
    def prepare_data(self):
        pass

    @abstractmethod
    def visualize(self):
        pass

    def replace_mappings(self, data):
        data['Tag'] = data['Tag'].replace(self.tag_mapping)
        data['Model'] = data['Model'].replace(self.dataset_mapping)
        return data
    
    def process_entity_confusion(self, entity_confusion, o_error):
        """
        Processes the entity confusion matrix into high-level error categories
        and a separate DataFrame for entity and exclusion errors.
        
        Parameters:
            entity_confusion (dict): A dictionary representing entity confusion components.
            
        Returns:
            renamed_df (DataFrame): High-level error categories (Entity, Boundary, Entity and Boundary, Exclusion).
            entity_errors (DataFrame): DataFrame containing Entity Errors and Exclusion Errors only.
        """
        # Step 1: Create DataFrame
        df = pd.DataFrame(entity_confusion).fillna(0).astype(int).T

        # Step 2: Rename columns into high-level categories
        errors = df.copy()
        errors[o_error] = errors.pop('O')  # Rename 'O' to 'Exclusion'
        errors['Entity'] = errors.drop(columns=['Boundary', 'Entity and Boundary', o_error], errors='ignore').sum(axis=1)
        errors = errors[['Entity', 'Boundary', 'Entity and Boundary', o_error]]

        # Step 3: Create a separate DataFrame for Entity and Exclusion only
        entity_errors = df.drop(columns=['Boundary', 'Entity and Boundary', 'O'], errors='ignore')

        return errors, entity_errors



mappings = {
    'tag_mapping': {'PERS': 'PER'},
    'dataset_mapping': {'ANERCorp_CamelLab_arabertv02': 'Arabic', 'conll2003_bert': 'English'}
}


class ConfusionBarChart(Visualization):
    def prepare_data(self):
        matrix_data = []
        for data_name, data_content in self.data.items():
            entity_matrix = pd.DataFrame(data_content.entity_non_strict_confusion_data['confusion_matrix']).T 
            entity_strict_matrix = pd.DataFrame(data_content.entity_strict_confusion_data['confusion_matrix']).T
            entity_matrix['Model'] = data_name
            entity_matrix['Scheme'] = 'IOB1'
            entity_strict_matrix['Model'] = data_name
            entity_strict_matrix['Scheme'] = 'IOB2'
            matrix_data.append(pd.concat([
				entity_matrix, 
				entity_strict_matrix
			]))
        matrix_df = pd.concat(matrix_data)
        matrix_df.reset_index(inplace=True)
        matrix_df.rename(columns={'index': 'Tag'}, inplace=True)
        matrix_data = self.replace_mappings(matrix_df)
        
        grouped = matrix_data.groupby(['Tag', 'Model', 'Scheme']).sum()
        grouped['Total'] = grouped['TP'] + grouped['FP'] + grouped['FN']
        
        matrix_data = matrix_data.merge(grouped['Total'], on=['Tag', 'Model', 'Scheme'], how='left')
        
        matrix_data['TP_Count'] = matrix_data['TP']
        matrix_data['FP_Count'] = matrix_data['FP']
        matrix_data['FN_Count'] = matrix_data['FN']
        
        matrix_data['TP'] = matrix_data['TP'] / matrix_data['Total']
        matrix_data['FP'] = matrix_data['FP'] / matrix_data['Total']
        matrix_data['FN'] = matrix_data['FN'] / matrix_data['Total']
        
        confusion_scaled_df = matrix_data.melt(id_vars=["Tag", "Model", "Scheme"], value_vars=["TP", "FP", "FN"], var_name="Metric", value_name="Scale")
        confusion_count_df = matrix_data.melt(id_vars=["Tag", "Model", "Scheme"], value_vars=["TP_Count", "FP_Count", "FN_Count"], var_name="Metric", value_name="Count")
        confusion_count_df['Metric'] = confusion_count_df['Metric'].str.replace('_Count', '')
        confusion_data = confusion_scaled_df.merge(confusion_count_df, on=["Tag", "Model", "Scheme", "Metric"])
        return confusion_data

    def visualize(self):
        confusion_df = self.prepare_data()
        
        confusion_df['Tag'] = pd.Categorical(confusion_df['Tag'], categories=["LOC", "MISC", "ORG", "PER"], ordered=True)
        print(confusion_df)
        fig = px.bar(confusion_df, x="Tag", y="Scale", color="Metric",
            facet_row="Scheme", facet_col="Model",
            title="Breakdown of Confusion Matrix Components: by Entity Span, Categorized by Model and Tagging Scheme",
            labels={"Scale": "Scaled Counts"},
            barmode='group',
            template="plotly_white",
            facet_row_spacing=0.1,  # Adjusted spacing
            facet_col_spacing=0.08,
            text='Count',  # Display the actual Count on top of each bar
            width=1500,
            category_orders={"Tag": ["LOC", "MISC", "ORG", "PER"]}  # Enforce the order in the plot

            )
        
        
        fig.show()

class ConfusionHeatmap(Visualization):
    def prepare_data(self):
        matrix_data = []
        for data_name, data_content in self.data.items():
            entity_matrix = pd.DataFrame(data_content.entity_non_strict_confusion_data['confusion_matrix']).T 
            entity_strict_matrix = pd.DataFrame(data_content.entity_strict_confusion_data['confusion_matrix']).T
            entity_matrix['Model'] = data_name
            entity_matrix['Scheme'] = 'IOB1'
            entity_strict_matrix['Model'] = data_name
            entity_strict_matrix['Scheme'] = 'IOB2'
            matrix_data.append(pd.concat([
				entity_matrix, 
				entity_strict_matrix
			]))
        matrix_df = pd.concat(matrix_data)
        matrix_df.reset_index(inplace=True)
        matrix_df.rename(columns={'index': 'Tag'}, inplace=True)
        return self.replace_mappings(matrix_df)

    def visualize(self):
        matrix_df = self.prepare_data()
        confusion_df = matrix_df.melt(id_vars=['Tag', 'Model', 'Scheme'], value_vars=['FP', 'FN'], 
                            var_name='Metric', value_name='Count')
        
        unique_schemes = confusion_df['Scheme'].unique()
        unique_datasets = confusion_df['Model'].unique()
        
        fig = make_subplots(rows=len(unique_schemes), cols=len(unique_datasets),
                            subplot_titles=[f"{dataset} - {scheme}" for scheme in unique_schemes for dataset in unique_datasets],
                            shared_yaxes=True, horizontal_spacing=0.02, vertical_spacing=0.1)
        
        max_value = confusion_df['Count'].max()
        
        for idx, scheme in enumerate(unique_schemes):
            for jdx, dataset in enumerate(unique_datasets):
                filtered_data = confusion_df[(confusion_df['Scheme'] == scheme) & (confusion_df['Model'] == dataset)]
                heatmap_data = filtered_data.pivot_table(index='Metric', columns='Tag', values='Count', fill_value=0)
                text_data = filtered_data.pivot_table(index='Metric', columns='Tag', values='Count', fill_value=0).astype(int)

                
                
                fig.add_trace(
                    go.Heatmap(
                        z=heatmap_data,
                        x=heatmap_data.columns,
                        y=heatmap_data.index,
                        colorscale='RdBu_r',
                        coloraxis="coloraxis",  # Use a unified color axis
                        text=text_data,  # Add text annotations
                        texttemplate="%{text}",  # Use the text values directly
                        hovertemplate="Metric: %{y}<br>Tag: %{x}<br>Count: %{text}<extra></extra>",
                    ),
                    row=idx + 1, col=jdx + 1
                )
                
        fig.update_layout(
            coloraxis=dict(colorscale='RdBu_r', cmin=0, cmax=max_value, colorbar=dict(title="Counts")),
            title_text="Confusion Matrix Heatmap Categorized by Dataset and Tagging Scheme",
            template="plotly_white",
            height=600, width=700,
        )
        fig.show()




class ErrorTypeHeatmap(Visualization):
    def prepare_data(self, component):
        matrix_data = []
        o_error = "Inclusion" if component == 'false_positives' else "Exclusion"
        # Step 1: Collect general error data
        for data_name, data_content in self.data.items():
            for scheme, entity_confusion in [
                # ('IOB1', data_content.entity_non_strict_confusion_data), 
                                             ('IOB2', data_content.entity_strict_confusion_data)]:
                # Process general errors (Entity, Boundary, Entity+Boundary, Exclusion)
                error_types, _ = self.process_entity_confusion(entity_confusion[component], o_error)

                # Annotate with Model and Scheme
                error_types['Model'] = data_name
                error_types['Scheme'] = scheme
                matrix_data.append(error_types)
        
        # Step 2: Combine and process data
        matrix_df = pd.concat(matrix_data)
        matrix_df.reset_index(inplace=True)
        matrix_df.rename(columns={'index': 'Tag'}, inplace=True)
        matrix_df = self.replace_mappings(matrix_df)
        
        # Step 3: Melt raw counts for visualization
        melted_df = matrix_df.melt(
            id_vars=['Tag', 'Model', 'Scheme'],
            value_vars=['Entity', 'Boundary', 'Entity and Boundary', o_error],
            var_name="Error Type",
            value_name="Raw Count"
        )
        return melted_df

    def visualize(self, component):
        general_errors_df = self.prepare_data(component)
        
        title_component = "False Positives" if component == 'false_positives' else "False Negatives"
        
        # Step 4: Create heatmap for raw errors
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=[f"{scheme} - {model}" for scheme in general_errors_df['Scheme'].unique() 
                            for model in general_errors_df['Model'].unique()],
            shared_xaxes=True, shared_yaxes=True, horizontal_spacing=0.05, vertical_spacing=0.1
        )
        
        unique_schemes = general_errors_df['Scheme'].unique()
        unique_models = general_errors_df['Model'].unique()
        
        for i, scheme in enumerate(unique_schemes):
            for j, model in enumerate(unique_models):
                filtered_data = general_errors_df[(general_errors_df['Scheme'] == scheme) &
                                                  (general_errors_df['Model'] == model)]
                
                pivot_data = filtered_data.pivot(index='Error Type', columns='Tag', values='Raw Count')
                print(pivot_data)
                fig.add_trace(
                    go.Heatmap(
                        z=pivot_data.values,
                        x=pivot_data.columns,
                        y=pivot_data.index,
                        coloraxis="coloraxis",
                        text=pivot_data.values,
                        texttemplate="%{text}",
                        hovertemplate="Tag: %{x}<br>Error Type: %{y}<br>Count: %{text}<extra></extra>"
                    ),
                    row=i + 1, col=j + 1
                )
       
        fig.update_layout(
            coloraxis=dict(colorscale='RdBu_r', colorbar=dict(title="Error Count")),
            title_text=f"{title_component} Error Type Heatmap: by Entity Span, Categorized by Model and Tagging Scheme",
            template="plotly_white",
            height=600, width=1000,
        )
        fig.show()
    
    def visualize_table(self, component):
        """
        Generates tables showing raw counts and percentages for each error type, 
        categorized by Scheme and Model.
        """
        # Step 1: Prepare the data
        errors_type = self.prepare_data(component)
        
        # Aggregate totals for error types
        pivot_data = errors_type.groupby(["Error Type", "Scheme", "Model"], as_index=False).agg(
            Total_Count=("Raw Count", "sum")
        )

        # Step 2: Calculate percentages across all errors within each Scheme and Model
        pivot_data['Percentage'] = (
            pivot_data.groupby(['Scheme', 'Model'])['Total_Count']
            .transform(lambda x: (x / x.sum()) * 100)  # Use transform to maintain row alignment
        )
        pivot_data['Percentage'] = pivot_data['Percentage'].round(2)  # Round percentages for display

        # Step 3: Print tables for each Scheme and Model
        unique_schemes = pivot_data['Scheme'].unique()
        unique_models = pivot_data['Model'].unique()

        for scheme in unique_schemes:
            for model in unique_models:
                print(f"\n### Table for Scheme: {scheme}, Model: {model} ###\n")
                filtered_data = pivot_data[
                    (pivot_data['Scheme'] == scheme) & 
                    (pivot_data['Model'] == model)
                ].copy()
                display_df = filtered_data[['Error Type', 'Total_Count', 'Percentage']].copy()
                display_df.rename(
                    columns={"Error Type": "Error Type", "Total_Count": "Raw Count", "Percentage": "Percentage (%)"},
                    inplace=True
                )
                print(display_df.to_string(index=False))  # Display as a clean table
                

class EntityErrorsHeatmap(Visualization):
    def prepare_data(self, component):
        matrix_data = []
        o_error = "Inclusion" if component == 'false_positives' else "Exclusion"
        # Step 1: Collect general error data
        for data_name, data_content in self.data.items():
            for scheme, entity_confusion in [('IOB1', data_content.entity_non_strict_confusion_data), 
                                             ('IOB2', data_content.entity_strict_confusion_data)]:
                # Process general errors (Entity, Boundary, Entity+Boundary, Exclusion)
                _, entity_errors = self.process_entity_confusion(entity_confusion[component], o_error)

                # Annotate with Model and Scheme
                entity_errors['Model'] = data_name
                entity_errors['Scheme'] = scheme
                entity_errors = entity_errors.rename(columns=self.tag_mapping)
                matrix_data.append(entity_errors)
        
        # Step 2: Combine and process data
        matrix_df = pd.concat(matrix_data)
        matrix_df.reset_index(inplace=True)
        matrix_df.rename(columns={'index': 'Tag'}, inplace=True)
        matrix_df = self.replace_mappings(matrix_df)
        
        
        
        # Step 3: Melt raw counts for visualization
        melted_df = melted_df = matrix_df.melt(
            id_vars=['Tag', 'Model', 'Scheme'],
            var_name="Error Type",
            value_name="Raw Count"
        )
        return melted_df

    def visualize(self, component):
        entity_errors_df = self.prepare_data(component)
        
        title_component = "False Positives" if component == 'false_positives' else "False Negatives"
        # Step 4: Create heatmap for raw errors
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=[f"{scheme} - {model}" for scheme in entity_errors_df['Scheme'].unique() 
                            for model in entity_errors_df['Model'].unique()],
            shared_xaxes=True, shared_yaxes=True, horizontal_spacing=0.05, vertical_spacing=0.1
        )
        
        unique_schemes = entity_errors_df['Scheme'].unique()
        unique_models = entity_errors_df['Model'].unique()
        
        for i, scheme in enumerate(unique_schemes):
            for j, model in enumerate(unique_models):
                entity_errors = entity_errors_df[(entity_errors_df['Scheme'] == scheme) &
                                                  (entity_errors_df['Model'] == model)]
                
                pivot_data = entity_errors.pivot(index='Error Type', columns='Tag', values='Raw Count')
                print(pivot_data)
                
                fig.add_trace(
                    go.Heatmap(
                        z=pivot_data.values,
                        x=pivot_data.columns,
                        y=pivot_data.index,
                        coloraxis="coloraxis",
                        text=pivot_data.values,
                        texttemplate="%{text}",
                        hovertemplate="Tag: %{x}<br>Error Type: %{y}<br>Count: %{text}<extra></extra>"
                    ),
                    row=i + 1, col=j + 1
                )
                if i>0 or j>1:
                    fig.update_xaxes(title_text="True Entity", row=i + 1, col=j + 1)
                fig.update_yaxes(title_text="Predicted Entity", row=i + 1, col=j + 1)
        
        fig.update_layout(
            coloraxis=dict(colorscale='RdBu_r', colorbar=dict(title="Error Count")),
            title_text=f"{title_component} Entity Errors Heatmap: by Entity Span, Categorized by Model and Tagging Scheme",
            template="plotly_white",
            height=600, width=1000,
        )
        fig.show()
    

from abc import ABC, abstractmethod
from collections import defaultdict
from seqeval.scheme import Entities, IOB2, IOB1
from seqeval.metrics.sequence_labeling import get_entities
pd.set_option("display.max_rows", None)  # Display all rows


class EntityErrorAnalyzer(ABC):
    """Abstract base class for entity analysis."""

    def __init__(self, df):
        self.df = df
        self.y_true, self.y_pred = self.prepare_data(df)
        self.true_entities = []
        self.pred_entities = []

    @abstractmethod
    def extract_entities(self, y_data):
        """Extract entities based on the specific mode (strict or non-strict)."""
        pass

    @abstractmethod
    def prepare_entities(self):
        """Prepare true and predicted entities for analysis."""
        pass
    
    def prepare_data(self, df):
        core_data = df[df['Labels'] !=-100]
        y_true = core_data.groupby('Sentence Ids')['True Labels'].apply(list).tolist()
        y_pred = core_data.groupby('Sentence Ids')['Pred Labels'].apply(list).tolist()
        return y_true, y_pred
    
    def compute_false_negatives(self, entity_type):
        """Compute false negatives for a specific entity type."""
        return set(
            [e for e in self.true_entities if e[1] == entity_type]
        ) - set([e for e in self.pred_entities if e[1] == entity_type])

    def compute_false_positives(self, entity_type):
        """Compute false positives for a specific entity type."""
        return set(
            [e for e in self.pred_entities if e[1] == entity_type]
        ) - set([e for e in self.true_entities if e[1] == entity_type])

    def analyze_sentence_errors(self, target_entities, comparison_entities):
        """Analyze errors and return sentence IDs by error type."""
        error_sentences = defaultdict(set)  # Dictionary to hold sentence IDs for each error type
        non_o_errors = set()
        indexed_entities = defaultdict(list)

        # Index comparison entities by sentence
        for entity in comparison_entities:
            sen, entity_type, start, end = entity
            indexed_entities[sen].append(entity)

        # First pass: entity errors
        for target_entity in target_entities:
            t_sen, t_type, t_start, t_end = target_entity

            for comp_entity in indexed_entities[t_sen]:
                c_type, c_start, c_end = comp_entity[1:]

                if (
                    t_start == c_start
                    and t_end == c_end
                    and t_type != c_type
                    and target_entity not in non_o_errors
                ):
                    non_o_errors.add(target_entity)
                    error_sentences["Entity"].add(target_entity)

        # Second pass: boundary errors
        for target_entity in target_entities - non_o_errors:
            t_sen, t_type, t_start, t_end = target_entity

            for comp_entity in indexed_entities[t_sen]:
                c_sen, c_type, c_start, c_end = comp_entity

                if (
                    t_type == c_type
                    and (t_start <= c_start <= t_end or t_start <= c_end <= t_end)
                    and target_entity not in non_o_errors
                ):
                    non_o_errors.add(target_entity)
                    error_sentences["Boundary"].add(target_entity)

        # Third pass: combined entity and boundary errors
        for target_entity in target_entities - non_o_errors:
            t_sen, t_type, t_start, t_end = target_entity

            for comp_entity in indexed_entities[t_sen]:
                c_sen, c_type, c_start, c_end = comp_entity

                if (
                    c_type != t_type
                    and (t_start <= c_start <= t_end or t_start <= c_end <= t_end)
                    and target_entity not in non_o_errors
                ):
                    non_o_errors.add(target_entity)
                    error_sentences["Entity and Boundary"].add(target_entity)
                    # print(t_sen, t_start, t_end, c_sen, c_start, c_end)
                    # print(f' ({t_start} <= {c_start} <= {t_end} or {t_start} <= {c_end} <= {t_end})')
                    

        # Remaining unmatched errors are "O errors"
        for target_entity in target_entities - non_o_errors:
            t_sen, t_type, t_start, t_end = target_entity
            error_sentences["O"].add(target_entity)

        return {error_type: list(s_ids) for error_type, s_ids in error_sentences.items()}


    def analyze_component(self, error_type, entity_type=None):
        
        """Analyze errors (FP or FN) for a specific or all entity types."""
        self.prepare_entities()
        error_analysis = {}
        entity_types = (
            [entity_type]
            if entity_type
            else set(e[1] for e in self.true_entities + self.pred_entities)
        )

        for etype in entity_types:
            if error_type == "false_negatives":
                target_entities = self.compute_false_negatives(etype)
            elif error_type == "false_positives":
                target_entities = self.compute_false_positives(etype)
            else:
                raise ValueError("Error type must be 'false_negative' or 'false_positive'.")

            error_analysis[etype] = self.analyze_sentence_errors(
                target_entities, self.pred_entities if error_type == "false_negatives" else self.true_entities
            )

        return error_analysis
    
    def analyze_errors(self):
        self.prepare_entities()
        """Analyze both false positives and false negatives."""
        error_components = {"false_positives": defaultdict(set), "false_negatives": defaultdict(set)}

        for error_component in error_components.keys():
            results = self.analyze_component(error_component)
            for entity_type, errors in results.items():
                for error_type, sentences in errors.items():
                    error_components[error_component][error_type].update(sentences)

        # Convert sets to lists for consistency
        return {k: {etype: set(ids) for etype, ids in v.items()} for k, v in error_components.items()}
    
    


class StrictEntityAnalyzer(EntityErrorAnalyzer):
    """Analyzer for strict entity processing."""

    def extract_entities(self, y_data):
        """Extract entities in strict mode."""
        entities = Entities(y_data, IOB2, False)
        return self.adjust_end_index(entities)

    def prepare_entities(self):
        """Prepare true and predicted entities for strict mode."""
        self.true_entities = self.flatten_entities(self.extract_entities(self.y_true))
        self.pred_entities = self.flatten_entities(self.extract_entities(self.y_pred))

    def print_sentence(self, sen_id):
        """Print entities for a specific sentence ID."""
        true_entities = self.extract_entities(self.y_true).entities
        pred_entities = self.extract_entities(self.y_pred).entities
        print(f"True: {true_entities[sen_id]}")
        print(f"Pred: {pred_entities[sen_id]}")
        error = set(pred_entities[sen_id]) - set(true_entities[sen_id])
        print(f"Error in Pred: {error}")
        core_data = self.df[self.df['Labels'] !=-100]
        sentence_data = core_data[core_data['Sentence Ids']  == sen_id].copy()
        print(sentence_data[['Words', 'Sentence Ids', 'True Labels', 'Pred Labels', 'Strict True Entities', 'Strict Pred Entities', 'True Entities', 'Pred Entities']].head(60).to_string())

    @staticmethod
    def flatten_entities(entities):
        """Flatten strict entities into tuples."""
        return [e for sen in entities.entities for e in sen]
    
    @staticmethod
    def adjust_end_index(entities):
        """Adjust the end index for IOB2 entities to make them inclusive."""
        adjusted_entities = []
        for sentence_entities in entities.entities:  # Iterate through sentences
            adjusted_sentence = []
            for entity in sentence_entities:  # Iterate through entities in each sentence
                sentence_id, entity_type, start, end = entity.to_tuple()
                # Adjust end index
                adjusted_sentence.append((sentence_id, entity_type, start, end - 1))
            adjusted_entities.append(adjusted_sentence)
        entities.entities = adjusted_entities  # Replace with adjusted entities
        return entities
    
    
    
    
class NonStrictEntityAnalyzer(EntityErrorAnalyzer):
    """Analyzer for non-strict entity processing."""

    def extract_entities(self, y_data):
        """Extract entities in non-strict mode."""
        return [
            [(sen_id,) + entity for entity in get_entities(sen)]
            for sen_id, sen in enumerate(y_data)
        ]

    def prepare_entities(self):
        """Prepare true and predicted entities for non-strict mode."""
        self.true_entities = self.flatten_entities(self.extract_entities(self.y_true))
        self.pred_entities = self.flatten_entities(self.extract_entities(self.y_pred))

    def print_sentence(self, sen_id):
        """Print entities for a specific sentence ID."""
        true_entities = self.extract_entities(self.y_true)
        pred_entities = self.extract_entities(self.y_pred)
        print(f"True: {true_entities[sen_id]}")
        print(f"Pred: {pred_entities[sen_id]}")
        error = set(pred_entities[sen_id]) - set(true_entities[sen_id])
        print(f"Error in Pred: {error}")
        core_data = self.df[self.df['Labels'] !=-100]
        sentence_data = core_data[core_data['Sentence Ids']  == sen_id].copy()
        print(sentence_data[['Words', 'Sentence Ids', 'True Labels', 'Pred Labels', 'Strict True Entities', 'Strict Pred Entities', 'True Entities', 'Pred Entities']].head(60).to_string())
        
    @staticmethod
    def flatten_entities(entities):
        """Flatten non-strict entities into tuples."""
        return [e for sen in entities for e in sen]

class ErrorAnalysisManager:
    """Manages all error analysis workflows and stores results."""

    def __init__(self, df):
        """
        Initialize the manager with the dataset.

        Args:
            df (pd.DataFrame): The dataset containing y_true and y_pred.
        """
        self.df = df
        self.strict_analyzer = StrictEntityAnalyzer(df)
        self.non_strict_analyzer = NonStrictEntityAnalyzer(df)
        self.results = {
            "IOB2": {"false_negatives": None, "false_positives": None, "errors": None},
            "IOB": {"false_negatives": None, "false_positives": None, "errors": None},
        }

    def run_workflows(self):
        """Run all error analysis workflows."""
        self.results["IOB2"]["false_negatives"] = self.strict_analyzer.analyze_component("false_negatives")
        self.results["IOB2"]["false_positives"] = self.strict_analyzer.analyze_component("false_positives")
        self.results["IOB2"]["errors"] = self.strict_analyzer.analyze_errors()

        self.results["IOB"]["false_negatives"] = self.non_strict_analyzer.analyze_component("false_negatives")
        self.results["IOB"]["false_positives"] = self.non_strict_analyzer.analyze_component("false_positives")
        self.results["IOB"]["errors"] = self.non_strict_analyzer.analyze_errors()

    def get_results(self):
        """Get the results of all workflows."""
        return self.results

class SchemeComparator:
    """Facilitator for comparing annotation schemes."""

    def __init__(self, results):
        """
        Initialize the comparator with results from error analysis.

        Args:
            results (dict): Results from the manager's workflows, structured by scheme.
        """
        self.results = results

    def compare_component(self, component, entity_type):
        """
        Compare all error types for a specific entity across schemes.

        Args:
            entity_type (str): The entity type to compare (e.g., "MISC").

        Returns:
            dict: A dictionary with set operation results for all error types.
        """
        schemes = list(self.results.keys())
        if len(schemes) != 2:
            raise ValueError("Comparator requires exactly two schemes for comparison.")

        scheme_1, scheme_2 = schemes
        component_1 = self.results[scheme_1][component]
        component_2 = self.results[scheme_2][component]

        results = {}
        entity_1 = component_1.get(entity_type, {})
        entity_2 = component_2.get(entity_type, {})

        # Compare all error types under the given entity
        all_error_types = set(entity_1.keys()).union(set(entity_2.keys()))
        for error_type in all_error_types:
            set_1 = set(entity_1.get(error_type, []))
            set_2 = set(entity_2.get(error_type, []))

            results[error_type] = {
                "overlap": set_1 & set_2,
                f"{scheme_1} Only": set_1 - set_2,
                f"{scheme_2} Only": set_2 - set_1,
            }

        return results

    def compare_errors(self, component, error_type):
        """
        Compare errors across all entities and error types for both schemes.

        Returns:
            dict: A dictionary with set operation results for all error types.
        """
        schemes = list(self.results.keys())
        if len(schemes) != 2:
            raise ValueError("Comparator requires exactly two schemes for comparison.")

        schemes_map = {'scheme_1': 'IOB', 'scheme_2': 'IOB2'}
        errors_1 = self.results[schemes_map['scheme_1']]["errors"][component]
        errors_2 = self.results[schemes_map['scheme_2']]["errors"][component]

       
       
        comparison_result = ComparisonResult.from_lists(errors_1, errors_2, error_type, schemes_map)

        return comparison_result.to_dict()


from dataclasses import dataclass, field
from typing import List, Dict, Set

@dataclass
class ComparisonResult:
    """Dataclass to store comparison results."""
    scheme_1_name: str
    scheme_2_name: str
    set_1_errors: Set[int] = field(default=set)
    set_2_errors: Set[int] = field(default=set)
    overlap: Set[int] = field(default_factory=set)
    scheme_1_only: Set[int] = field(default_factory=set)
    scheme_2_only: Set[int] = field(default_factory=set)

    @staticmethod
    def from_lists(errors_1: Dict, errors_2: Dict, error_type: str, schemes_map: Dict) -> "ComparisonResult":
        """
        Create a ComparisonResult from two lists.

        Args:
            lst_1: List of values from scheme 1.
            lst_2: List of values from scheme 2.

        Returns:
            ComparisonResult: Dataclass containing the comparison and statistics.
        """
        set_1 = set(errors_1.get(error_type, []))
        
        set_2 = set(errors_2.get(error_type, []))
        
        sentence_lst_1 = [error[0] for error in errors_1.get(error_type, [])]
        sentence_lst_2 = [error[0] for error in errors_2.get(error_type, [])]
        sentence_set_1 = set(sentence_lst_1)
        sentence_set_2 = set(sentence_lst_2)
        
        overlap = sentence_set_1 & sentence_set_2
        scheme_1_only = sentence_set_1 - sentence_set_2
        scheme_2_only = sentence_set_2 - sentence_set_1

        return ComparisonResult(
            scheme_1_name=schemes_map['scheme_1'],
            scheme_2_name=schemes_map['scheme_2'],
            set_1_errors= set_1,
            set_2_errors= set_2,
            overlap=overlap,
            scheme_1_only=scheme_1_only,
            scheme_2_only=scheme_2_only,
        )
        
    def to_dict(self) -> Dict[str, Dict[str, Set[int]]]:
        """R"Overlap": self.overlap, comparison results as a dictionary."""
        return {
            f"{self.scheme_1_name} Errors": self.set_1_errors,
            f"{self.scheme_2_name} Errors": self.set_2_errors,
            "Overlap": self.overlap,
            f"{self.scheme_1_name} Only Errors": self.scheme_1_only,
            f"{self.scheme_2_name} Only Errors": self.scheme_2_only,
        }





In [3]:
# CONFIG_PATH = Path("/Users/ay227/Desktop/Final-Year/Thesis-Experiments/Online-Dashboard-Phase/dashboard-config.yaml")
CONFIG_PATH = Path("/Users/ahmed/Desktop/Dashboard/analysis-config.yaml")

config_manager = DashboardConfigManager(CONFIG_PATH)
dev_config = config_manager.development_config    

app = Dash(__name__, suppress_callback_exceptions=True)

app_config = config_manager.app_config
server = app.server  # Flask server instance for caching
variants_data = None

data_manager = DataManager(config_manager, server)
dash_data = data_manager.load_data()

2025-09-21 21:14:09 - INFO - Found Google Drive directory for account ahmed.younes.sam@gmail.com: /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com
2025-09-21 21:14:09 - INFO - Loading Dashboard Data from  /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com/My Drive/Final Year Experiments/Thesis-Experiments/Experiments/BaseLineExperiment/ANERCorp_CamelLab_arabertv02


  0%|          | 0/18 [00:00<?, ?it/s]

2025-09-21 21:14:12 - INFO - Found Google Drive directory for account ahmed.younes.sam@gmail.com: /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com
2025-09-21 21:14:12 - INFO - Found Google Drive directory for account ahmed.younes.sam@gmail.com: /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com
2025-09-21 21:14:12 - INFO - Found Google Drive directory for account ahmed.younes.sam@gmail.com: /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com
2025-09-21 21:14:12 - INFO - Loading Dashboard Data from  /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com/My Drive/Final Year Experiments/Thesis-Experiments/Experiments/BaseLineExperiment/conll2003_bert


  0%|          | 0/18 [00:00<?, ?it/s]

2025-09-21 21:14:22 - INFO - Found Google Drive directory for account ahmed.younes.sam@gmail.com: /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com
2025-09-21 21:14:22 - INFO - Found Google Drive directory for account ahmed.younes.sam@gmail.com: /Users/ahmed/Library/CloudStorage/GoogleDrive-ahmed.younes.sam@gmail.com


In [None]:
from typing import Dict, Iterator, Iterable, Tuple, List, Optional
import pandas as pd
import numpy as np

from seqeval.metrics.sequence_labeling import get_entities
from seqeval.scheme import Entities, auto_detect
from collections import Counter, defaultdict


_SPLIT_LABEL    = {"train": "Train", "test": "Test", "validation": "Validation", "dev": "Validation"}
_DATASET_LABEL  = {"ANERCorp_CamelLab": "ANERCorp", "conll2003": "CoNLL-2003"}


# pretty/variant names -> raw corpus keys (extend as needed)
_DATASET_MAP    = {
    "ANERCorp": "ANERCorp_CamelLab",
    "CoNLL-2003": "conll2003",
    "ANERCorp_CamelLab_arabertv02": "ANERCorp_CamelLab",
    "conll2003_bert": "conll2003",
}

LANGUAGE_MAP = {
    "ANERCorp_CamelLab": "Arabic",
    "conll2003": "English",
    "ANERCorp_CamelLab_arabertv02": "Arabic",
    "conll2003_bert": "English",
}


_TAG_NORMALIZE  = {"B-PERS": "B-PER", "I-PERS": "I-PER"}

_DEFAULT_TAGS = ['B-LOC','I-LOC','B-PER','I-PER','B-ORG','I-ORG','B-MISC','I-MISC']

BASE_DATASETS = {"ANERCorp_CamelLab", "conll2003"}


_DEFAULT_ENTITY_SPANS = ["LOC", "PER", "ORG", "MISC"]

_ENTITY_NORMALIZE = {"PERS": "PER"}  # unify ANER's PERS→PER


EXCLUDE_TAGS = {"micro", "macro", "weighted"}
ROW_ORDER = ["Precision", "Recall", "F1-score"]

class BaseDatasetHelper:
    """Only cross-cutting utilities. No analysis-specific logic here."""

    def __init__(self, corpora: Dict):
        self.corpora = corpora

    # ---------- variant → dataset keys ----------
    def resolve_language_keys(self, variant: str) -> List[str]:
        """Return raw corpus keys for a given variant or 'combined'."""
        if variant == "combined":
            # choose the ones you want to include in 'combined'
            # return [k for k in self.corpora.keys() if k in ("ANERCorp_CamelLab", "conll2003")]
            return [k for k in self.corpora.keys() if k in BASE_DATASETS]
        # pretty/alias → raw
        if variant in _DATASET_MAP:
            return [_DATASET_MAP[variant]]
        # # assume it's already a raw key
        return [variant]

    # ---------- labels ----------
    def lang_label(self, ds_key: str) -> str:
        return LANGUAGE_MAP.get(ds_key, ds_key)

    def split_label(self, split_key: str) -> str:
        return _SPLIT_LABEL.get(split_key.lower(), split_key.title())

    # ---------- iterate splits ----------
    def iter_splits(self, ds_key: str) -> Iterator[Tuple[str, str, pd.DataFrame]]:
        """
        Yields (split_key, split_label, df) for a dataset key.
        - Prefers train/test if present
        - Drops validation for conll2003 (customize rules here)
        """
        splits = self.corpora[ds_key]["splits"]
        order = [k for k in ("train", "test") if k in splits] or list(splits.keys())
        if ds_key == "conll2003":
            order = [k for k in order if k.lower() != "validation"]
        for sk in order:
            yield sk, self.split_label(sk), splits[sk]

    # ---------- tag normalization (optional) ----------
    def normalize_tag_column(self, df: pd.DataFrame) -> pd.DataFrame:
        """
        Ensure a 'Tag' column exists and normalize label variants.
        Accepts 'True Labels' or 'Tag' as source.
        """
        tag_col = "True Labels" if "True Labels" in df.columns else ("Tag" if "Tag" in df.columns else None)
        if tag_col is None:
            raise ValueError("Expected a tag column: 'True Labels' or 'Tag'.")
        out = df.copy()
        out[tag_col] = out[tag_col].replace(_TAG_NORMALIZE)
        return out.rename(columns={tag_col: "Tag"})
    
    @staticmethod
    def _normalize_tag(tag: str) -> str:
        return _TAG_NORMALIZE.get(tag, tag)




class BaseDashDataProcessor:
    def __init__(self, dash_data: Dict[str, "DashboardData"]):
        self.dash_data = dash_data  # raw objects with .train_data / .analysis_data
        self.corpora = {}           # filled by build_corpora()

    # labels (optional pretty names)
    # def ds_label(self, variant_key: str) -> str:
    #     """
    #     Example:
    #       variant_key='ANERCorp_CamelLab_arabertv02'
    #       DATA_MAP[variant_key] -> 'ANERCorp_CamelLab'
    #       _DATASET_LABEL['ANERCorp_CamelLab'] -> 'ANERCorp'
    #     Falls back gracefully if mappings are missing.
    #     """
    #     ds_key = _DATASET_MAP.get(variant_key, variant_key)
    #     return _DATASET_LABEL.get(ds_key, ds_key)
    def ds_label(self, variant_key: str) -> str:
        return LANGUAGE_MAP.get(variant_key, variant_key)


    def normalise_data(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df[df['Labels'] != -100].copy()
        if 'True Labels' in df.columns:
            df['True Labels'] = df['True Labels'].replace({'B-PERS': 'B-PER', 'I-PERS': 'I-PER'})
        return df

    def _resolve_keys_for_variant(self, variant: str) -> list[str]:
        """Map a variant to concrete keys in dash_data."""
        
        if variant == "combined":
            return list(self.dash_data.keys())
        return [variant]

    def build_corpora(self, variant: str) -> dict:
        """
        selected_variant:
          - '<variant_name>' e.g. 'ANERCorp_CamelLab_arabertv02'
          - 'combined'       -> include all variants present in dash_data

        Populate self.corpora for the requested variant.
        Output shape:
          self.corpora[<Dataset Label>]['splits'] = {'Train': df, 'Test': df}
        """
        self.corpora = {}
        variant_keys = self._resolve_keys_for_variant(variant)

        for variant_key in variant_keys:
            ds_lbl = self.ds_label(variant_key)

            # ---- get & normalize splits ----
            train_df = self.normalise_data(self.dash_data[variant_key].train_data)
            test_df  = self.normalise_data(self.dash_data[variant_key].analysis_data)

            # ensure expected columns exist
            if "Core Tokens" not in train_df.columns and "core_tokens" in train_df.columns:
                train_df = train_df.rename(columns={"core_tokens": "Core Tokens"})
            if "Core Tokens" not in test_df.columns and "core_tokens" in test_df.columns:
                test_df  = test_df.rename(columns={"core_tokens": "Core Tokens"})

            self.corpora[ds_lbl] = {
                "splits": {
                    "Train": train_df,
                    "Test":  test_df,
                }
            }

        return self.corpora
    
    def iter_splits(self, ds_key: str, only_test: bool = False) -> Iterator[Tuple[str, str, pd.DataFrame]]:
        """
        Yield dataset splits.
        Set only_test=True to restrict to Test split (ignore Train).
        """
        splits = ("Test",) if only_test else ("Train", "Test")
        for sk in splits:
            if sk in self.corpora[ds_key]["splits"]:
                yield sk, sk, self.corpora[ds_key]["splits"][sk]


class EntitySpanF1Helper(BaseDashDataProcessor):
    """
    Extract F1 scores per entity type from both IOB1 and IOB2 reports.
    Output DataFrame columns: Language, Scheme, Tag, F1
    """

    def generate_df(self, selected_variant: str, round_to: int = 3) -> pd.DataFrame:
        self.build_corpora(selected_variant)
        rows = []

        for ds_lbl, content in self.dash_data.items():
            lang_lbl = self.ds_label(ds_lbl)

            # IOB1 (non-strict)
            iob1 = content.entity_non_strict_report.copy()
            iob1 = iob1[~iob1["Tag"].isin(["micro", "macro", "weighted"])]
            for _, row in iob1.iterrows():
                rows.append({
                    "Language": lang_lbl,
                    "Scheme": "IOB1",
                    "Tag": row["Tag"],
                    "F1-score": round(float(row["F1"]), round_to)
                })

            # IOB2 (strict)
            iob2 = content.entity_strict_report.copy()
            iob2 = iob2[~iob2["Tag"].isin(["micro", "macro", "weighted"])]
            for _, row in iob2.iterrows():
                rows.append({
                    "Language": lang_lbl,
                    "Scheme": "IOB2",
                    "Tag": row["Tag"],
                    "F1-score": round(float(row["F1"]), round_to)
                })

        return pd.DataFrame(rows)


In [16]:
helper = EntitySpanF1Helper(dash_data)

helper.generate_df("combined")

Unnamed: 0,Language,Scheme,Tag,F1-score
0,Arabic,IOB1,LOC,0.909
1,Arabic,IOB1,MISC,0.674
2,Arabic,IOB1,ORG,0.749
3,Arabic,IOB1,PERS,0.856
4,Arabic,IOB2,LOC,0.913
5,Arabic,IOB2,MISC,0.696
6,Arabic,IOB2,ORG,0.767
7,Arabic,IOB2,PERS,0.852
8,English,IOB1,LOC,0.926
9,English,IOB1,MISC,0.807


In [22]:
@staticmethod
def normalize_spans(df: pd.DataFrame, col: str = "Tag") -> pd.DataFrame:
        """
        Normalize BIO span tags (e.g., PERS -> PER).
        Operates in-place on a copy of the DataFrame.
        """
        
        out = df.copy()
        
        if col in out.columns:
            out[col] = out[col].replace(_TAG_NORMALIZE)
            print(out)
        return out
normalize_spans(dash_data['ANERCorp_CamelLab_arabertv02'].entity_non_strict_report)

        Tag  Precision  Recall      F1  Support                         Model  \
0       LOC     0.8919  0.9275  0.9094      676  ANERCorp_CamelLab_arabertv02   
1      MISC     0.7366  0.6214  0.6741      243  ANERCorp_CamelLab_arabertv02   
2       ORG     0.7630  0.7364  0.7494      459  ANERCorp_CamelLab_arabertv02   
3      PERS     0.8835  0.8298  0.8558      905  ANERCorp_CamelLab_arabertv02   
4     micro     0.8483  0.8178  0.8327     2283  ANERCorp_CamelLab_arabertv02   
5     macro     0.8187  0.7788  0.7972     2283  ANERCorp_CamelLab_arabertv02   
6  weighted     0.8461  0.8178  0.8310     2283  ANERCorp_CamelLab_arabertv02   

  Scheme  
0   IOB1  
1   IOB1  
2   IOB1  
3   IOB1  
4   IOB1  
5   IOB1  
6   IOB1  


Unnamed: 0,Tag,Precision,Recall,F1,Support,Model,Scheme
0,LOC,0.8919,0.9275,0.9094,676,ANERCorp_CamelLab_arabertv02,IOB1
1,MISC,0.7366,0.6214,0.6741,243,ANERCorp_CamelLab_arabertv02,IOB1
2,ORG,0.763,0.7364,0.7494,459,ANERCorp_CamelLab_arabertv02,IOB1
3,PERS,0.8835,0.8298,0.8558,905,ANERCorp_CamelLab_arabertv02,IOB1
4,micro,0.8483,0.8178,0.8327,2283,ANERCorp_CamelLab_arabertv02,IOB1
5,macro,0.8187,0.7788,0.7972,2283,ANERCorp_CamelLab_arabertv02,IOB1
6,weighted,0.8461,0.8178,0.831,2283,ANERCorp_CamelLab_arabertv02,IOB1


In [None]:
report_data = []
for data_name, data_content in dash_data.items():
	entity_report = data_content.entity_non_strict_report
	entity_strict_report = data_content.entity_strict_report
	entity_report['Model'] = data_name
	entity_report['Scheme'] = 'IOB1'
	entity_strict_report['Model'] = data_name
	entity_strict_report['Scheme'] = 'IOB2'
	report_data.append(pd.concat([
		entity_report, 
		entity_strict_report
	]))
report_df = pd.concat(report_data)
# report_df = report_df[~report_df['Tag'].isin(['micro', 'macro', 'weighted'])]    
tag_mapping = {
    'PERS': 'PER'
}

dataset_mapping = {
    'ANERCorp_CamelLab_arabertv02': 'Arabic',
    'conll2003_bert': 'English'
}

report_df['Tag'] = report_df['Tag'].replace(tag_mapping)
report_df['Model'] = report_df['Model'].replace(dataset_mapping)
report_df


Unnamed: 0,Tag,Precision,Recall,F1,Support,Model,Scheme
0,LOC,0.8919,0.9275,0.9094,676,Arabic,IOB1
1,MISC,0.7366,0.6214,0.6741,243,Arabic,IOB1
2,ORG,0.763,0.7364,0.7494,459,Arabic,IOB1
3,PER,0.8835,0.8298,0.8558,905,Arabic,IOB1
4,micro,0.8483,0.8178,0.8327,2283,Arabic,IOB1
5,macro,0.8187,0.7788,0.7972,2283,Arabic,IOB1
6,weighted,0.8461,0.8178,0.831,2283,Arabic,IOB1
0,LOC,0.8927,0.9341,0.9129,668,Arabic,IOB2
1,MISC,0.772,0.634,0.6963,235,Arabic,IOB2
2,ORG,0.7842,0.7511,0.7673,450,Arabic,IOB2


In [41]:
class ReportBarChart(Visualization):
    def __init__(self, data_dict, dataset_names, mappings: dict, scheme: str = 'IOB1'):
        """
        ReportBarChart for multiple datasets using one tagging scheme.

        Parameters:
            data_dict (dict): A dict of dataset_name -> DashData instances.
            dataset_names (list): List of dataset names to include.
            mappings (dict): Tag and dataset name mappings.
            scheme (str): Either 'IOB1' (non-strict) or 'IOB2' (strict).
        """
        self.datasets = {name: data_dict[name] for name in dataset_names}
        self.model_names = {name: mappings.get('dataset_mapping')[name] for name in dataset_names}
        super().__init__(data_dict, mappings)

        if scheme not in ['IOB1', 'IOB2']:
            raise ValueError("Scheme must be 'IOB1' or 'IOB2'")
        self.scheme = scheme

    def prepare_data(self):
        dfs = []

        for name, dataset in self.datasets.items():
            if self.scheme == 'IOB1':
                report_df = dataset.entity_non_strict_report.copy()
            else:
                report_df = dataset.entity_strict_report.copy()

            report_df = report_df[~report_df['Tag'].isin(['micro', 'macro', 'weighted'])]
            report_df['Scheme'] = self.scheme
            report_df['Model'] = self.model_names[name]

            dfs.append(self.replace_mappings(report_df))

        return pd.concat(dfs, ignore_index=True)

    def visualize(self):
        df = self.prepare_data()
        melted = df.melt(
            id_vars=["Tag", "Support", "Model", "Scheme"],
            value_vars=["Precision", "Recall"],
            var_name="Metric",
            value_name="Value"
        )
        melted['Value'] = melted['Value'].round(3)

        fig = px.bar(
            melted,
            x="Tag",
            y="Value",
            color="Metric",
            barmode="group",
            facet_row="Model",
            facet_row_spacing=0.1,
            text="Value",
            template="plotly_white",
            title=f"Precision and Recall by Entity Span ({self.scheme})"
        )

        fig.update_layout(height=800, width=1000)
        fig.show()

    def visualize_f1(self):
        df = self.prepare_data()
        df['F1'] = df['F1'].round(3)

        fig = px.bar(
            df,
            x="Tag",
            y="F1",
            color="Model",
            barmode="group",
            text="F1",
            template="plotly_white",
            title=f"F1 Score by Entity Type ({self.scheme})"
        )

        fig.update_layout(height=600, width=1000)
        fig.show()

    def visualize_support(self):
        df = self.prepare_data()

        fig = px.bar(
            df,
            x="Tag",
            y="Support",
            color="Model",
            barmode="group",
            text="Support",
            template="plotly_white",
            title=f"Support Counts by Entity Type ({self.scheme})"
        )

        fig.update_layout(height=600, width=1000)
        fig.show()


In [42]:
chart = ReportBarChart(
    dash_data, 
    ['ANERCorp_CamelLab_arabertv02', 'conll2003_bert'], 
    mappings, 
    scheme='IOB2'
)
chart.visualize()



In [18]:
mappings

{'tag_mapping': {'PERS': 'PER'},
 'dataset_mapping': {'ANERCorp_CamelLab_arabertv02': 'AraBERTv02',
  'conll2003_bert': 'BERT'}}

In [6]:
class ReportBarChart(Visualization):
    def __init__(self, data, name, mappings: dict, scheme: str = 'IOB1'):
        """
        ReportBarChart for a single dataset using one tagging scheme.

        Parameters:
            data (DashData): A dataclass object for a single dataset.
            mappings (dict): Tag and dataset name mappings.
            scheme (str): Either 'IOB1' (non-strict) or 'IOB2' (strict).
        """
        self.model_name = mappings.get('dataset_mapping')[name]
        super().__init__(data[name], mappings)

        if scheme not in ['IOB1', 'IOB2']:
            raise ValueError("Scheme must be 'IOB1' or 'IOB2'")
        self.scheme = scheme

    def prepare_data(self):
        if self.scheme == 'IOB1':
            report_df = self.data.entity_non_strict_report.copy()
        else:
            report_df = self.data.entity_strict_report.copy()

        report_df = report_df[~report_df['Tag'].isin(['micro', 'macro', 'weighted'])]
        report_df['Scheme'] = self.scheme
        report_df['Model'] = self.model_name
        

        return self.replace_mappings(report_df)

    def visualize(self):
        df = self.prepare_data()
        melted = df.melt(
            id_vars=["Tag", "Support", "Model", "Scheme"],
            value_vars=["Precision", "Recall"],
            var_name="Metric",
            value_name="Value"
        )
        melted['Value'] = melted['Value'].round(3)

        fig = px.bar(
            melted,
            x="Tag",
            y="Value",
            color="Metric",
            barmode="group",
            text="Value",
            template="plotly_white",
            title=f"Precision and Recall by Entity Type ({self.scheme})"
        )

        fig.update_layout(height=600, width=1000)
        fig.show()

    def visualize_f1(self):
        df = self.prepare_data()
        df['F1'] = df['F1'].round(3)

        fig = px.bar(
            df,
            x="Tag",
            y="F1",
            color="Model",
            barmode="group",
            text="F1",
            template="plotly_white",
            title=f"F1 Score by Entity Type ({self.scheme})"
        )

        fig.update_layout(height=600, width=1000)
        fig.show()

    def visualize_support(self):
        df = self.prepare_data()

        fig = px.bar(
            df,
            x="Tag",
            y="Support",
            color="Model",
            barmode="group",
            text="Support",
            template="plotly_white",
            title=f"Support Counts by Entity Type ({self.scheme})"
        )

        fig.update_layout(height=600, width=1000)
        fig.show()


In [7]:


chart = ReportBarChart(dash_data, 'ANERCorp_CamelLab_arabertv02', mappings, scheme='IOB2')
chart.visualize()



In [19]:

mappings = {
    'tag_mapping': {'PERS': 'PER'},
    'dataset_mapping': {'ANERCorp_CamelLab_arabertv02': 'AraBERTv02', 'conll2003_bert': 'BERT'}
}


class ConfusionBarChart(Visualization):
    def prepare_data(self):
        matrix_data = []
        for data_name, data_content in self.data.items():
            entity_matrix = pd.DataFrame(data_content.entity_non_strict_confusion_data['confusion_matrix']).T 
            entity_strict_matrix = pd.DataFrame(data_content.entity_strict_confusion_data['confusion_matrix']).T
            entity_matrix['Model'] = data_name
            entity_matrix['Scheme'] = 'IOB1'
            entity_strict_matrix['Model'] = data_name
            entity_strict_matrix['Scheme'] = 'IOB2'
            matrix_data.append(pd.concat([
				entity_matrix, 
				entity_strict_matrix
			]))
        matrix_df = pd.concat(matrix_data)
        matrix_df.reset_index(inplace=True)
        matrix_df.rename(columns={'index': 'Tag'}, inplace=True)
        matrix_data = self.replace_mappings(matrix_df)
        
        grouped = matrix_data.groupby(['Tag', 'Model', 'Scheme']).sum()
        grouped['Total'] = grouped['TP'] + grouped['FP'] + grouped['FN']
        
        matrix_data = matrix_data.merge(grouped['Total'], on=['Tag', 'Model', 'Scheme'], how='left')
        
        matrix_data['TP_Count'] = matrix_data['TP']
        matrix_data['FP_Count'] = matrix_data['FP']
        matrix_data['FN_Count'] = matrix_data['FN']
        
        matrix_data['TP'] = matrix_data['TP'] / matrix_data['Total']
        matrix_data['FP'] = matrix_data['FP'] / matrix_data['Total']
        matrix_data['FN'] = matrix_data['FN'] / matrix_data['Total']
        
        confusion_scaled_df = matrix_data.melt(id_vars=["Tag", "Model", "Scheme"], value_vars=["TP", "FP", "FN"], var_name="Metric", value_name="Scale")
        confusion_count_df = matrix_data.melt(id_vars=["Tag", "Model", "Scheme"], value_vars=["TP_Count", "FP_Count", "FN_Count"], var_name="Metric", value_name="Count")
        confusion_count_df['Metric'] = confusion_count_df['Metric'].str.replace('_Count', '')
        confusion_data = confusion_scaled_df.merge(confusion_count_df, on=["Tag", "Model", "Scheme", "Metric"])
        return confusion_data

    def visualize(self):
        confusion_df = self.prepare_data()
        anercorp = confusion_df[(confusion_df['Model'] == 'AraBERTv02')&(confusion_df['Scheme'] == 'IOB2')].copy()
        anercorp['Tag'] = pd.Categorical(anercorp['Tag'], categories=["LOC", "MISC", "ORG", "PER"], ordered=True)
        print(anercorp)
        fig = px.bar(anercorp, x="Tag", y="Scale", color="Metric",
            # facet_row="Scheme", facet_col="Model",
            title="Breakdown of Confusion Matrix Components: by Entity Span, Categorized by Model and Tagging Scheme",
            labels={"Scale": "Scaled Counts"},
            barmode='group',
            template="plotly_white",
            facet_row_spacing=0.1,  # Adjusted spacing
            facet_col_spacing=0.08,
            text='Count',  # Display the actual Count on top of each bar
            # width=1500,
            category_orders={"Tag": ["LOC", "MISC", "ORG", "PER"]}  # Enforce the order in the plot

            )
        
        
        fig.show()

In [20]:
confusion_bar = ConfusionBarChart(dash_data, mappings)
confusion_bar.visualize()

     Tag       Model Scheme Metric     Scale  Count
4    ORG  AraBERTv02   IOB2     TP  0.622468    338
5    LOC  AraBERTv02   IOB2     TP  0.839838    624
6   MISC  AraBERTv02   IOB2     TP  0.534050    149
7    PER  AraBERTv02   IOB2     TP  0.741803    724
20   ORG  AraBERTv02   IOB2     FP  0.171271     93
21   LOC  AraBERTv02   IOB2     FP  0.100942     75
22  MISC  AraBERTv02   IOB2     FP  0.157706     44
23   PER  AraBERTv02   IOB2     FP  0.120902    118
36   ORG  AraBERTv02   IOB2     FN  0.206262    112
37   LOC  AraBERTv02   IOB2     FN  0.059219     44
38  MISC  AraBERTv02   IOB2     FN  0.308244     86
39   PER  AraBERTv02   IOB2     FN  0.137295    134


In [None]:
# Confusion matrix heatmap example
report_bar = ReportBarChart(dash_data, mappings)
report_bar.visualize_support()

In [33]:
# Confusion matrix heatmap example
confusion_heatmap = ConfusionHeatmap(dash_data, mappings)
confusion_heatmap.visualize()

In [None]:
import plotly.express as px
import pandas as pd

class ErrorTypeBarChart(Visualization):
    def prepare_data(self):
        matrix_data = []
        error_components = ["false_positives", "false_negatives"]

        for component in error_components:
            o_error = "Inclusion" if component == 'false_positives' else "Exclusion"

            for data_name, data_content in self.data.items():
                for scheme, entity_confusion in [
                    # ('IOB1', data_content.entity_non_strict_confusion_data), 
                                                ('IOB2', data_content.entity_strict_confusion_data)]:
                    error_types, _ = self.process_entity_confusion(entity_confusion[component], o_error)

                    # 🔹 Rename Inclusion/Exclusion to "O Errors"
                    error_types.rename(columns={o_error: "O Errors"}, inplace=True)

                    error_types['Model'] = data_name
                    error_types['Scheme'] = scheme
                    error_types['Component'] = "False Positives" if component == "false_positives" else "False Negatives"
                    matrix_data.append(error_types)

        # Combine into a single DataFrame
        matrix_df = pd.concat(matrix_data)
        matrix_df.reset_index(inplace=True)
        matrix_df.rename(columns={'index': 'Tag'}, inplace=True)
        matrix_df = self.replace_mappings(matrix_df)

        # Melt for visualization
        melted_df = matrix_df.melt(
            id_vars=['Model', 'Scheme', 'Component'],
            value_vars=['Entity', 'Boundary', 'Entity and Boundary', "O Errors"],
            var_name="Error Type",
            value_name="Raw Count"
        )

        # Aggregate totals for error types per Scheme & Model
        pivot_data = melted_df.groupby(["Error Type", "Scheme", "Model", "Component"], as_index=False).agg(
            Total_Count=("Raw Count", "sum")
        )
        

        # Compute percentages per scheme/model
        pivot_data['Percentage'] = (
            pivot_data.groupby(['Scheme', 'Model', 'Component'])['Total_Count']
            .transform(lambda x: (x / x.sum()) * 100)
        )

        pivot_data['Percentage'] = pivot_data['Percentage'].round(2)
        

        return pivot_data


    def visualize(self):
        """
        Generates a single bar chart with both False Positives & False Negatives.
        """
        error_data = self.prepare_data()

        # 🔹 Define consistent colors for False Positives & False Negatives
        color_map = {
            "False Positives": "#E74C3C",  # Red (FP)
            "False Negatives": "#00CC96",  # Teal (FN)
        }

        # Create a bar chart with facet_row and facet_col
        fig = px.bar(
            error_data,
            x="Error Type",
            y="Percentage",
            color="Component",
            text="Total_Count",
            facet_row="Scheme",
            facet_col="Model",
            facet_col_spacing=0.1,
            barmode="group",
            title="Distribution of Error Types within False Positives and False Negatives Across Models and Annotation Schemes.",
            labels={"Percentage": "Percentage (%)", "Total_Count": "Raw Count"},
            # height=700,
            # width=1000,
            color_discrete_map=color_map  # Assign colors to error types
        )

        # Update text position
        fig.update_traces(textposition='auto')

        # Adjust layout for better readability
        fig.update_layout(
            template="plotly_white",
            showlegend=True,
            margin=dict(t=100, b=50, l=50, r=50)
        )

        fig.show()


In [62]:
error_bar_chart = ErrorTypeBarChart(dash_data, mappings)
error_bar_chart.visualize()  # For False Positives




In [45]:
error_type_heatmap = ErrorTypeHeatmap(dash_data, mappings)
error_type_heatmap.visualize_table('false_positives')


### Table for Scheme: IOB1, Model: Arabic ###

         Error Type  Raw Count  Percentage (%)
           Boundary        116           34.73
             Entity         83           24.85
Entity and Boundary         29            8.68
          Inclusion        106           31.74

### Table for Scheme: IOB1, Model: English ###

         Error Type  Raw Count  Percentage (%)
           Boundary         78           14.44
             Entity        233           43.15
Entity and Boundary         86           15.93
          Inclusion        143           26.48

### Table for Scheme: IOB2, Model: Arabic ###

         Error Type  Raw Count  Percentage (%)
           Boundary        101           30.61
             Entity         76           23.03
Entity and Boundary         22            6.67
          Inclusion        131           39.70

### Table for Scheme: IOB2, Model: English ###

         Error Type  Raw Count  Percentage (%)
           Boundary         69           13.88
       

In [64]:
error_type_heatmap = ErrorTypeHeatmap(dash_data, mappings)
error_type_heatmap.visualize('false_positives')

Tag                  LOC  MISC  ORG  PER
Error Type                              
Boundary              14    16   27   44
Entity                29    12   23   12
Entity and Boundary   12     1    2    7
Inclusion             20    15   41   55
Tag                  LOC  MISC  ORG  PER
Error Type                              
Boundary               9    23   23   14
Entity                71    44   92   24
Entity and Boundary   24    18   19    7
Inclusion             25    50   38   16


In [47]:
entity_errors_heatmap = EntityErrorsHeatmap(dash_data, mappings) 
entity_errors_heatmap.visualize('false_positives')



Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0     3    3    1
MISC          6     0    9    0
ORG          20     6    0   11
PER           6     5   13    0
Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0    14   43    5
MISC         19     0   31    3
ORG          40    28    0   16
PER          12     2   20    0
Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0     3    3    1
MISC          4     0    9    0
ORG          19     6    0   11
PER           6     3   11    0
Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0    14   42    5
MISC         19     0   30    3
ORG          40    28    0   16
PER          12     2   20    0


In [48]:
error_type_heatmap = ErrorTypeHeatmap(dash_data, mappings)
error_type_heatmap.visualize_table('false_negatives')

error_type_heatmap = ErrorTypeHeatmap(dash_data, mappings)
error_type_heatmap.visualize('false_negatives')

entity_errors_heatmap = EntityErrorsHeatmap(dash_data, mappings) 
entity_errors_heatmap.visualize('false_negatives')



### Table for Scheme: IOB1, Model: Arabic ###

         Error Type  Raw Count  Percentage (%)
           Boundary        128           30.77
             Entity         83           19.95
Entity and Boundary         32            7.69
          Exclusion        173           41.59

### Table for Scheme: IOB1, Model: English ###

         Error Type  Raw Count  Percentage (%)
           Boundary         81           17.76
             Entity        233           51.10
Entity and Boundary         77           16.89
          Exclusion         65           14.25

### Table for Scheme: IOB2, Model: Arabic ###

         Error Type  Raw Count  Percentage (%)
           Boundary        113           30.05
             Entity         76           20.21
Entity and Boundary         32            8.51
          Exclusion        155           41.22

### Table for Scheme: IOB2, Model: English ###

         Error Type  Raw Count  Percentage (%)
           Boundary         76           16.49
       

Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0     6   20    6
MISC          3     0    6    5
ORG           3     9    0   13
PER           1     0   11    0
Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0    19   40   12
MISC         14     0   28    2
ORG          43    31    0   20
PER           5     3   16    0
Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0     4   19    6
MISC          3     0    6    3
ORG           3     9    0   11
PER           1     0   11    0
Tag         LOC  MISC  ORG  PER
Error Type                     
LOC           0    19   40   12
MISC         14     0   28    2
ORG          42    30    0   20
PER           5     3   16    0


In [None]:
df = dash_data['conll2003_bert'].analysis_data

In [None]:
dash_data['conll2003_bert'].token_confusion_matrix

In [None]:
manager = ErrorAnalysisManager(df)
manager.run_workflows()
results = manager.get_results()

In [None]:
len(results['IOB2']['false_negatives']['LOC']['Boundary'])

In [None]:
comparator = SchemeComparator(results)
component_comparison = comparator.compare_component("false_negatives", "LOC")
component_comparison
overall_comparison = comparator.compare_errors('false_negatives', 'Entity and Boundary')


In [None]:
overall_comparison

In [None]:
manager.non_strict_analyzer.print_sentence(2068)

In [None]:
manager.strict_analyzer.print_sentence(2068)

In [None]:
core_data = df[df['Labels']!=-100].copy()
y_true = core_data.groupby('Sentence Ids')['True Labels'].apply(list).tolist()
y_pred = core_data.groupby('Sentence Ids')['Pred Labels'].apply(list).tolist()

In [None]:
ids = 84
print(get_entities(y_true[ids]))
print(get_entities(y_pred[ids]))
print('######')
print(manager.strict_analyzer.adjust_end_index(Entities([y_true[ids]], IOB2, False)).entities)
print(manager.strict_analyzer.adjust_end_index(Entities([y_pred[ids]], IOB2, False)).entities)



In [None]:
manager.strict_analyzer.adjust_end_index(Entities([y_true[ids]], IOB2, False)).entities