In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# 1. Setup paths
current_dir       = os.path.basename(os.path.dirname(os.path.abspath("*")))
bert_path         = os.path.join('..', 'BERT_misclassified_tweets.csv')
azure_path        = os.path.join(f'{current_dir}_misclassified_tweets.csv')
error_tweets_path = os.path.join(f'{current_dir}_error_tweets.csv')

# 2. Load data
bert_df   = pd.read_csv(bert_path)
azure_df  = pd.read_csv(azure_path)
errors    = pd.read_csv(error_tweets_path)

# 3. Normalize columns
azure_df = azure_df.rename(columns={
    'Original full Tweet': 'text',
    'True label':          'true_label',
    'Predicted label':     'predicted_label'
})
errors = errors.rename(columns={
    'Original tweet':    'text',
    'True class':        'true_label_error',
    'Violation reasons': 'error_reason',
    'Severity label':    'severity_label'
})

# 4. Filter to misclassifications
bert_df   = bert_df[bert_df['predicted_label']  != bert_df['true_label']]
azure_df  = azure_df[azure_df['predicted_label'] != azure_df['true_label']]

# 5. Identify only-BERT misclassifications
only_bert = bert_df[~bert_df['text'].isin(azure_df['text'])].copy()

# 6. Find overlap with error tweets
error_only_bert = only_bert.merge(
    errors[['text','true_label_error','error_reason','severity_label']],
    on='text', how='inner'
)

# 7. Conditional message if no overlap
if error_only_bert.empty:
    print("No common tweets between BERT misclassifications and Azure error tweets.")
else:
    print(f"Found {len(error_only_bert)} BERT misclassifications that failed in Azure.")

# 8. Detailed error-tweet analysis
# a) Save the subset
err_csv = os.path.join(f'{current_dir}_only_bert_error_tweets.csv')
error_only_bert[['text','true_label_error','error_reason','severity_label']].to_csv(err_csv, index=False)
print(f"Saved only-BERT error tweets to {err_csv}")



Found 2 BERT misclassifications that failed in Azure.
Saved only-BERT error tweets to AzureAI_4o-mini_only_bert_error_tweets.csv
