# placeholder {.hidden}

## Convergence with other categorization schemes

We turn next to a comparison of our _attribute_-centered scheme with two existing _group_ categorization schemes, focusing on the schemes by @thau_how_2019 and @horne_using_2025.
These analyses illustrate patterns of alignment between our and existing schemes but will also highlight important differences between them.
Our goal is not to compare schemes' accuracy but to illustrate how our attribute-centered, hierarchical approach adds analytical value.

In [None]:
#| eval: false
import os
from pathlib import Path
import gc

import pandas as pd
import numpy as np

import re
from utils.fighting_words import bayes_compare_language as compute_fighting_words
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords


import sys
base_path = Path.cwd().parent.parent
sys.path.append(str(base_path / 'paper'))

from reporting import *

from IPython.display import HTML
latex_table = lambda x, *args, **kwargs: HTML(x.to_html(index=False))

# suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')

# mock table printing functions for development

# Define base directory (paper/secs -> go up two levels to project root)
base_path = Path.cwd().parent.parent
data_path = base_path / 'data'
labeled_path = data_path / 'labeled'
intermediate_path = data_path / 'intermediate'
manifestos_path = data_path / 'manifestos'
annotations_path = data_path / 'annotations'

### Comparison to Thau's social group category classifications

In [None]:
fp = annotations_path / "exdata" / 'thau2019_appeals_appeal.csv'

if not fp.exists():
    # Inference on Mads Thau's annotations
    df_thau = pd.read_csv("https://github.com/haukelicht/group_mention_detection/raw/refs/heads/main/replication/data/exdata/thau2019/thau2019_appeals_appeal.csv", encoding='latin1')
    df_thau = df_thau[['objid', 'objtype', 'objdim']].drop_duplicates()
    df_thau.columns = ['mention', 'group_type', 'group_category']

    # NOTE: Thau's coders used (..) to separate nested mentions
    idxs = df_thau.mention.str.contains('(', regex=False)
    # print(df_thau[idxs])
    df_thau.loc[idxs, 'mention'] = df_thau.loc[idxs, 'mention'].str.replace('(', '').str.replace(')', '')

    # NOTE: Thau's coders used .... to fix interrupted mentions
    idxs = df_thau.mention.str.contains('...', regex=False)
    # print(df_thau[idxs])
    df_thau.loc[idxs, 'mention'] = df_thau.loc[idxs, 'mention'].str.replace('...', ' ')
    df_thau.loc[idxs, 'mention'] = df_thau.loc[idxs, 'mention'].str.replace('…', ' ')

    # NOTE: Thau's coders used [...] to resolve coreferences
    idxs = df_thau.mention.str.contains('[', regex=False)
    # print(df_thau[idxs])
    df_thau.loc[idxs, 'mention'] = df_thau.loc[idxs, 'mention'].str.replace(r'\[[^\]]+?\]', ' ', regex=True)
    df_thau['mention'] = df_thau.mention.str.replace(r'\s+', ' ', regex=True)

    # create dir if needed
    fp.parent.mkdir(parents=True, exist_ok=True)
    df_thau.reset_index(drop=True, inplace=True)
    df_thau.to_csv(fp, index=False)
else:
    df_thau = pd.read_csv(fp)

We begin with a comparison to the group categorization scheme developed by @thau_how_2019 for his analysis of group-based appeals in British party manifestos [cf. @thau_social_2021].
His scheme differentiates between five group types, including the types _Social group_ and _Professional group_.
Within _Social group_ mentions, he further distinguishes between nine group categories (including one _Other_ category).

Below, we use the group category classifications of the 4,018 social and professional group mentions recorded in Thau's data, which he collected from trained annotators.
To show how our multilabel attribute annotations compare to Thau's single-label group category annotations, we have applied our classifiers to this sample of social group mentions.


In [None]:
fp = intermediate_path / 'thau2019_attribute_classifications.pkl'

if not fp.exists():
    import torch
    from setfit import SetFitModel

    # apply our classifiers
    device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"

    model_name = "haukelicht/all-mpnet-base-v2_economic-attributes-classifier"
    econ_classifier = SetFitModel.from_pretrained(model_name, force_download=True)
    # TODO: add revision number to ensure reproducibility
    econ_classifier.to(device);

    model_name = "haukelicht/all-mpnet-base-v2_noneconomic-attributes-classifier"
    nonecon_classifier = SetFitModel.from_pretrained(model_name, force_download=True)
    # TODO: add revision number to ensure reproducibility
    nonecon_classifier.to(device);

    label_cols = econ_classifier.labels + nonecon_classifier.labels

    def predict_attributes(x: list[str]) -> pd.DataFrame:
        with torch.no_grad():
            econ_predictions = econ_classifier.predict(x, as_numpy=True)
            nonecon_predictions = nonecon_classifier.predict(x, as_numpy=True)
        preds = np.concatenate([econ_predictions, nonecon_predictions], axis=1)
        return pd.DataFrame(preds, columns=label_cols)

    preds = predict_attributes(df_thau.mention.to_list())
    df_thau_preds = pd.concat([df_thau.reset_index(drop=True), preds], axis=1)

    fp.parent.mkdir(parents=True, exist_ok=True)
    df_thau_preds.to_pickle(fp)

    del nonecon_classifier
    del econ_classifier

    gc.collect()
else:
    label_cols = list(attribute_category_names_map.keys())
    df_thau_preds = pd.read_pickle(fp)

In [None]:
# compute conditional probability: Pr( attribute present | Thau's category )
these_thau_cats = ["Professional group", "Social group"]
cpr = df_thau_preds.query("group_type in @these_thau_cats")
cpr.loc[cpr.group_type=='Professional group', 'group_category'] = 'Professional group'
cpr = cpr.groupby('group_category')[label_cols].mean()
cpr[cpr<=0.009] = np.nan

In [None]:
thau_group_cats = cpr.index.sort_values().unique().tolist()
thau_group_cats.remove('Other')
thau_group_cats.append('Other')

In [None]:
#| label: fig-thau_categories_vs_our_attributes
#| output: true
#| fig-cap: "Correspondence of Thau's social group categorizations with our group attribute classofications. Numbers report the probability that a mention assigned by Thau's coders to one of his social group categories (shown on y-axis) has been labeled as featuring a given social attribute in our classification scheme (shown on x-axis). *Note:* Values below 0.009 not plotted to ease readability."

fig, axes = plot_heatmap(
    cpr.rename(columns=attribute_category_names_map), 
    panel_groups=[None, (econ_attr_names, nonecon_attr_names)],
    cmap='RdPu', #cmap='YlOrRd',
    clims=(0, 1.0),
    clegend_title='conditional probability\nPr(our classification | Thau\'s classification)',
    figsize_multiplier = (0.44, 0.46),
)
# reduce y-axis label size to fit better
for ax in axes:
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=9)
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=9)
axes[0].set_ylabel('Thau\'s group categories', fontsize=10, labelpad=10, fontweight='bold')
plt.show()

@fig-thau_categories_vs_our_attributes shows how Thau's group category and our attribute category classifications co-occur in this sample.
Specifically, @fig-thau_categories_vs_our_attributes reports the conditional probability that our classifiers have labeled a given mention as featuring the attribute category reported on the x-axis, given that the mention was labeled as belonging to the respective group category (y-axis labels) in Thau's data.
For example, 91% of mentions in Thau's _Age/generation_ category were labeled as featuring the non-economic attribute _age_ by our classifier; and because we adopt a multilabel logic, 19% mentions in Thau's _Age/generation_ category were (also) labeled as featuring the attribute _employment status_.

This comparison reveals two general patterns.
On the one hand, Thau's categories _Age/Generation_, _Geography_, _Health_, _Nationality_, _Professional group_ and _Religion_ show a strong correspondence to specific attribute categories in our scheme.
We argue that this validates our scheme since these group categories in Thau's scheme are already attribute-centered.
<!-- For example, 93% of the group mentions his coders labeled as Age/Generation are also marked as featuring the attribute age, 82% of his Geography-based mentions are tagged as place/location instances by our classifir. -->

However, on the other hand, other categories in Thau's scheme are characterized by more heterogeneous attribute expression patterns.
For example, group mentions in Thau's _Economic class_ category are not particularly strongly associated with any single attribute category but with several, especially _employment status_ (29%), _income/wealth/economic status_ (36%), and _occupation/profession_ (23%).
Similarly, instances in Thau's _Gender_ category are frequently labeled as featuring _gender/sexuality_ (49%) and/or _family_ (31%) as attributes.

The examples of _Economic class_ and _Gender_ in Thau's scheme point to an important strength of our hierarchical, attribute-centered taxonomy.
The case of _Economic class_ shows how we disaggregate this abstract sociological construct into more manifest indicators. <!-- by quantifying communication at the attribute level.-->
Accordingly, our taxonomy awards new possibilities for detailed analysis of how the notion of social class and status are communicated in concrete social group refrences by adding granularity.
The same applies to instances in the broad _Gender_  category, which are frequently labeled more specifically as _gender/sexuality_ or _family_ (or both) in our measurements.

In the Supporting Materials @sec-thau_crossvalidation, we present further analyses relying on Thau's annotated data that empirically substantiates our argument that multi-label classification of group mentions is crucial to capture the intersectional nature of many group mentions like "low-income families".
First, we analyze the 9.4% of social group mentions recorded in Thau's data that were classified into different group categories despite being verbatim identical.
Second, we discuss how our attribute annotations distribute across selected social group categories in Thau's data, showing that typically up to one third of Thau's singly-labeled mentions are intersectional according to our scheme.
These analyses underscore the relevance of intersectionality in social group mentions. <!-- and the consequences of ignoring it.-->

In [None]:
# #| eval: false
# # NOTE: imperfect overlap of conceptually matching categories is usually explained by concrete coding decision
# print(df_thau_preds.query("group_category=='Age/generation' and noneconomic__age==0")['mention'].tolist())
# print(df_thau_preds.query("group_category=='Geography' and noneconomic__place_location==0")['mention'].tolist())
# # NOTE: and/or classification error in our classifier's prediction =(
# print(df_thau_preds.query("group_category=='Ethnicity/race' and noneconomic__ethnicity==0")['mention'].tolist())


### Comparison to Horne et al.'s group categorizations

In [None]:
fps = [
    intermediate_path / 'horne_predictions_sample.pkl',
    intermediate_path / 'horne_predictions_econ_occ-prof_attribute_sample.pkl',
    intermediate_path / 'horne_predictions_nonecon_gender-sexuality_attribute_sample.pkl'
]

need_to_predict = not all(fp.exists() for fp in fps)

if need_to_predict:
    from transformers import pipeline, AutoConfig
    
    # Use a pipeline as a high-level helper

    # TODO: this should be one dataset, created before the analysis
    fp = labeled_path /  'manifesto_sentences_predicted_social_group_mentions_with_economic_attributes_classifications.tsv'
    df_econ = pd.read_csv(fp, sep='\t')
    
    fp = labeled_path /  'manifesto_sentences_predicted_social_group_mentions_with_noneconomic_attributes_classifications.tsv'
    df_nonecon = pd.read_csv(fp, sep='\t')


    model_id = "rwillh11/mdeberta_groups_2.0"
    config = AutoConfig.from_pretrained(model_id)

    # print(config.problem_type)
    pipe = pipeline("text-classification", model=model_id, top_k=None, function_to_apply="sigmoid")

    label_cols_horne = list(config.label2id.keys())

    def predict_horne(x: list[str]) -> pd.DataFrame:
        preds = pipe(x, batch_size=16)
        return pd.concat([pd.DataFrame({lab['label']: lab['score']>=0.5 for lab in e}, index=[i]) for i, e in enumerate(preds)])[label_cols_horne].astype(int)

    # get sam,ple and apply to all
    df_sample = df_econ.copy()
    df_sample = df_sample.sample(frac=0.1, random_state=42)
    df_sample[nonecon_label_cols] = df_nonecon.loc[df_sample.index, nonecon_label_cols]
    df_sample.reset_index(drop=True, inplace=True)
    df_sample[label_cols_horne] = predict_horne(df_sample.text.to_list())
    df_sample.to_pickle(fps[0])

    # apply to occupation/profession instance
    horne_predictions_econ_occprof_attribute_sample = df_econ.query('economic__occupation_profession==1').iloc[:, :6].copy()
    horne_predictions_econ_occprof_attribute_sample = horne_predictions_econ_occprof_attribute_sample.sample(5_000, random_state=42).reset_index(drop=True)
    horne_predictions_econ_occprof_attribute_sample[label_cols_horne] = predict_horne(horne_predictions_econ_occprof_attribute_sample.text.to_list())
    horne_predictions_econ_occprof_attribute_sample.to_pickle(fps[1])

    # apply to gender/sexuality instances
    horne_predictions_nonecon_gender_attribute_sample = df_nonecon.query('noneconomic__gender_sexuality==1').iloc[:, :6].copy()
    horne_predictions_nonecon_gender_attribute_sample = horne_predictions_nonecon_gender_attribute_sample.sample(5_000, random_state=42).reset_index(drop=True)
    horne_predictions_nonecon_gender_attribute_sample[label_cols_horne] = predict_horne(horne_predictions_nonecon_gender_attribute_sample.text.to_list())
    horne_predictions_nonecon_gender_attribute_sample.to_pickle(fps[2])
else:
    df_sample = pd.read_pickle(fps[0])
    horne_predictions_econ_occprof_attribute_sample = pd.read_pickle(fps[1])
    horne_predictions_nonecon_gender_attribute_sample = pd.read_pickle(fps[2])

    label_cols_horne = [c for c in df_sample.columns if re.match(r'^[A-Z]', c)]

In [None]:
# print(*label_cols_horne, sep='\n')

Next, we compare the annotations produced with our attribute-centered group mention labeling scheme to a more recent group classification scheme proposed by @horne_using_2025.
Horne et al.'s group categorization scheme features 44 group categories.
Notably, Horne et al. also account for intersectionality by allowing for the classification of group mentions into multiple categories -- similar to our approach.
However, their scheme also differs in important ways from ours.
First, like Thau's scheme, their scheme is centered on group _categories_ rather than _attributes_.
Second, their scheme comprises many more group categories than our scheme (12) and Thau's (10).
Third and related, Horne et al. do not explicitly organize their group categories in a hierarchical taxonomy.
This means that some of their categories, like _Ethnic And National Communities_ are rather broad, and combine attributes we separate into different attribute categories.
Other group categories in their scheme, like _Health Professionals_ are more specific.
Overall, this leads to a "ragged" taxonomy with categories that vary in their level of abstraction/breadth, and in some instances even leads to hierarchical "nesting" of categories, such as _Health Professionals_ being an instance of _White Collar Workers_ profession.

Below, we examine how these differences affect how their and our approaches label and partition the group mentions in a sample drawn from our data.
Specifically, we have sampled 10% of the social group mentions in our machine-labeled corpus and applied Horne et al.'s group category multi-label classifier to them.[^fn:horne_classifier]

[^fn:horne_classifier]: Using model `rwillh11/mdeberta_groups_2.0` hosted on the Hugging Face model hub (accessed on Jan 23, 2026).

In [None]:
def label_assoc_matrices(df, label_cols_a, label_cols_b, log_base=np.e, eps=1e-12):
    """
    Compute co-occurrence counts and association measures between two multilabel schemes.

    Returns dict with:
      - cooc_counts: DataFrame (|A| x |B|)
      - pmi:         DataFrame
      - ppmi:        DataFrame
      - npmi:        DataFrame (in [-1, 1])
      - p_b_given_a: DataFrame of P(B_j | A_i)
      - p_a_given_b: DataFrame of P(A_i | B_j)
      - marginals:   dict with Series for counts/probs
    """
    # Indicator matrices
    A = df[label_cols_a].astype(int).to_numpy()
    B = df[label_cols_b].astype(int).to_numpy()
    N = A.shape[0]

    # Counts
    C = A.T @ B                              # joint counts: |A| x |B|
    C_A = A.sum(axis=0).astype(float)        # counts for A labels: |A|
    C_B = B.sum(axis=0).astype(float)        # counts for B labels: |B|

    # Probabilities
    Pab = C / max(N, 1)
    Pa = C_A / max(N, 1)
    Pb = C_B / max(N, 1)

    # Use chosen log base
    def _log(x):
        if log_base == 2:
            return np.log2(x)
        if log_base == 10:
            return np.log10(x)
        return np.log(x)

    # PMI: log( P(A,B) / (P(A)P(B)) )
    PMI = _log((Pab + eps) / ((Pa[:, None] + eps) * (Pb[None, :] + eps)))

    # PPMI: max(PMI, 0)
    PPMI = np.maximum(PMI, 0.0)

    # nPMI: PMI / (-log P(A,B))
    nPMI = PMI / (-_log(Pab + eps))

    # Conditional probabilities (interpretable)
    P_b_given_a = (C + eps) / (C_A[:, None] + eps)   # P(B|A)
    P_a_given_b = (C + eps) / (C_B[None, :] + eps)   # P(A|B)

    # Wrap into DataFrames
    idx = pd.Index(label_cols_a, name="A_label")
    cols = pd.Index(label_cols_b, name="B_label")

    out = {
        "cooc_counts": pd.DataFrame(C, index=idx, columns=cols),
        "pmi":         pd.DataFrame(PMI, index=idx, columns=cols),
        "ppmi":        pd.DataFrame(PPMI, index=idx, columns=cols),
        "npmi":        pd.DataFrame(nPMI, index=idx, columns=cols),
        "p_b_given_a": pd.DataFrame(P_b_given_a, index=idx, columns=cols),
        "p_a_given_b": pd.DataFrame(P_a_given_b, index=idx, columns=cols),
        "marginals": {
            "N": N,
            "count_a": pd.Series(C_A, index=idx),
            "count_b": pd.Series(C_B, index=cols),
            "p_a": pd.Series(Pa, index=idx),
            "p_b": pd.Series(Pb, index=cols),
        }
    }
    return out

In [None]:
assoc = label_assoc_matrices(df_sample, label_cols, label_cols_horne)

npmi = assoc["npmi"]          # nPMI in [-1, 1]
ppmi = assoc["ppmi"]          # PPMI (>=0)
counts = assoc["cooc_counts"] # co-occurrence counts
p_b_given_a = assoc["p_b_given_a"]

In [None]:
#| eval: false
from scipy.stats import chi2_contingency

# compute a chi2 test on the co-occurrence counts matrix
chi2, p, dof, ex = chi2_contingency(counts)

# compute cramer's V
cramers_v = np.sqrt(chi2 / (assoc['marginals']['N'] * (min(len(label_cols), len(label_cols_horne)) - 1)))

print(f"Chi2 test on co-occurrence counts matrix: chi2={chi2:0.2f}, p={p:0.3e}, dof={dof}, Cramér's V={cramers_v:0.3f}")

First, it is notable that the annotations produced with both schemes show strong convergence.
A $\chi^2$-test computed on the label category co-occurrence count matrix indicates a highly significant overall association between the two classification schemes ($\chi^2$ = 108,311.55, p < 0.001, Cramér's V = 0.631).


In [None]:
#| label: fig-heatmap_horne_vs_ours
#| output: true
#| fig-cap: "Patterns of convergence between group mention classifications of our attribute-centered classifier and Horne et al.'s group category classifier. The figure reports normalized Pointwise Mutual Information (nPMI) values that measure the strength of association of label classes on a scale ranging from -1 (systematic disassociation) through 0 (independence) to +1 (perfect co-occurrence).  y-axis labels indicate the attribute category in our scheme; x-axis labels the group categories Horne et al.'s scheme. Plot panels separated by economic and non-economic attributes. *Note:* heatmap columns sorted (for each attribute dimension) using hierarchical clustering to better reveal conceptual overlap."


# Cluster economic attributes
pdat_econ = npmi.loc[econ_attrs].rename(index=attribute_category_names_map)
cl = sns.clustermap(pdat_econ)
plt.close()
pdat_econ = pdat_econ.iloc[cl.dendrogram_row.reordered_ind, cl.dendrogram_col.reordered_ind]

# Cluster economic attributes
pdat_nonecon = npmi.loc[nonecon_attrs].rename(index=attribute_category_names_map)
cl = sns.clustermap(pdat_nonecon)
plt.close()
pdat_nonecon = pdat_nonecon.iloc[cl.dendrogram_row.reordered_ind, cl.dendrogram_col.reordered_ind]

# Create combined figure with gridspec
heights = [len(econ_attrs), len(nonecon_attrs)+3.3]
fig = plt.figure(figsize=(12, (len(econ_attrs) + len(nonecon_attrs))*0.5))
gs = fig.add_gridspec(2, 1, height_ratios=heights, hspace=1.2)

# Create axes for the two heatmaps
ax1 = fig.add_subplot(gs[0, 0])
ax2 = fig.add_subplot(gs[1, 0])
# ax_cbar = fig.add_subplot(gs[2, 0])

# Plot economic attributes
sns.heatmap(pdat_econ, 
            fmt='.2f',
            square=True,
            cmap='PiYG',
            vmin=-1, vmax=1,
            cbar=False,
            linewidths=1,
            linecolor='white',
            ax=ax1,
            xticklabels=True,
            yticklabels=True)

# Move x-axis to top and rotate labels
ax1.xaxis.tick_top()
ax1.xaxis.set_label_position('top')
plt.setp(ax1.get_xticklabels(), rotation=45, ha='left')
ax1.set_xlabel(None)
ax1.set_ylabel(None)

# Plot non-economic attributes
sns.heatmap(pdat_nonecon, 
            fmt='.2f',
            square=True,
            cmap='PiYG',
            vmin=-1, vmax=1,
            cbar=True,
            cbar_kws={'orientation': 'horizontal', 'shrink': 0.6, 'pad': 0.1},
            linewidths=1,
            linecolor='white',
            ax=ax2,
            xticklabels=True,
            yticklabels=True
            )
ax2.xaxis.tick_top()
ax2.xaxis.set_label_position('top')
plt.setp(ax2.get_xticklabels(), rotation=45, ha='left')
# plt.setp(ax2.get_xticklabels(), rotation=45, ha='right')
ax2.set_xlabel(None)
ax2.set_ylabel(None)

# set colorbar label
cbar = ax2.collections[0].colorbar
cbar.set_label('normalized Pointwise Mutual Information (nPMI)', rotation=0, labelpad=6, ha='center')

plt.show()

@fig-heatmap_horne_vs_ours visually illustates how this overall strong correspondence arises.
The heatmaps show normalized pointwise mutual information (nPMI) values to quantify the strength of association between our economic and non-economic attribute categories (y-axis) and Horne et al.'s group categories (x-axis).
The theoretical range of PMI values ranges from -1 (systematic disassociation) through 0 (independence) to +1 (perfect co-occurrence).
Positive nPMI values (shaded green) in @fig-heatmap_horne_vs_ours reveal expected convergence patterns, such as the strong association between our _occupation/profession_ attribute and Horne et al.'s occupation-related categories (e.g., _Civil Servants_, _Farmers_, _Health Professionals_, etc.), between our _gender/sexuality_ attribute and their _Women_, _LGBTQI_, and _Men_ categories, or between our _health_ attribute category and their group categories _Patients_ and _People With Disabilities_.

Negative nPMI values (shaded magenta), on the other hand, indicate systematic disassociation where specific attribute categories do not co-occur with certain group categories in Horne et al.'s scheme.
For example, non-economic attributes like _religion_, _ethnicity_, and _family_ show consistent negative associations with occupation- and class-based group categories, because the group mentions sampled from our corpus are typically either categorized as either occupation/class-based or non-economic but rarely both at the same time. 
These negative associations are analytically meaningful because they reveal systematic patterns in how the two classification schemes partition the social group mention space: attributes that Horne et al.'s classifier treats as separate categories (e.g., occupation versus family status) show mutual exclusivity in their co-occurrence patterns.

The strong association and disassociation patterns revealed in @fig-heatmap_horne_vs_ours indicate that both classification schemes capture similar underlying patterns in how social group mentions are categorized.
However, it also shows that our attribute-centered approach groups some of Horne et al.'s more granular categories together (e.g., all occupation-related categories) while featurizing some of their broader categories with more specific attributes (e.g., _Ethnic And National Communities_ as mixture of _ethnicity_, _nationality_, _religion_, and _place/location_).
<!-- The hierarchical clustering applied to both rows and columns further reveals natural groupings within each classification scheme, with occupation-related categories clustering together and demographic categories forming distinct blocks, suggesting that while both approaches capture similar underlying patterns, our attribute-based framework provides a more granular decomposition that can accommodate mentions featuring multiple intersecting social dimensions. -->

In [None]:
cpr_df = p_b_given_a.melt(ignore_index=False).reset_index()
# TODO: show top five by our attribute category
top_cpr_vals_by_label_col = cpr_df.groupby("A_label").apply(lambda x: x.sort_values("value", ascending=False).head(5)).reset_index(drop=True)

In the Supplementary Materials @sec-horne_crossvalidation, we present further analyses focusing on occupation/profession and gender/sexuality-related mentions.
These analyses demonstrate that our attribute-centered framework, while strongly aligning with Horne et al.'s scheme, also allows capturing mentions related to these attributes that are not covered by pre-defined group categories.

# APPENDIX



<!-- put in additional results section or so -->

## Cross-validation against Thau's group category annotations  {#sec-thau_crossvalidation}

We use Thau's human-annotated social group categorization data to underscore the added value of taking intersectionality into account in group mention labeling.
The idea of intersectionality is that social group mentions often involve multiple group attributes at the same time and that from a methodological point of view, this requires multi-label classification.
Thau's coding scheme, however, is single-label, meaning that each group mention is assigned to one and only one group category.

In [None]:
# filter verbatim mentions that were assigned into multible group type categories
thau_ambigous_labels_examples = df_thau.query('group_type == "Social group"').groupby(['mention', 'group_type']).filter(lambda x: x.group_category.nunique()>1).groupby(['mention', 'group_type'])['group_category'].agg(set).sort_index().reset_index(name='categories')
thau_ambigous_labels_examples['categories_str'] = thau_ambigous_labels_examples['categories'].apply(lambda cats: ' + '.join(sorted(cats)))

In [None]:
# eval: false
print('N =', (df_thau.group_type=="Social group").sum())
print('N =', len(thau_ambigous_labels_examples), f'; share = {len(thau_ambigous_labels_examples)/(df_thau.group_type == "Social group").sum():0.3f}')

In [None]:
#| label: tbl-thau_conflicting_group_categorization_examples
#| output: true
#| tbl-cap: "Examples of verbatim group mentions with conflicting group category annotations in data by Thau (2019) for category combinations that were confused more than 10 times. Each row shows (at most) three examples of verbatim group mentions that have been categorized into one of the following categories in different sentence contexts in the corpus and thus have \"conflicting\" labels. $N$ reports the total number of mentions that exhibit the shown category confusion (in all 3,571 mentions)."

# TODO: consider moving to APPENDIX and only mention examples in text body

expls_df = thau_ambigous_labels_examples.copy()
expls_df['n_attrs'] = expls_df['categories'].apply(len)
expls_df[['cat1', 'cat2']] = expls_df['categories'].apply(lambda cats: pd.Series(sorted(list(cats))[:2]))
#expls_df = thau_ambigous_labels_examples[thau_ambigous_labels_examples['categories'].apply(len)==2]
expls_df = expls_df.groupby(['categories_str']).apply(lambda x: x.assign(n=len(x)).sample(min(len(x), 3), random_state=42)).reset_index(drop=True)
expls_df = expls_df.groupby(['categories_str', 'cat1', 'cat2', 'n_attrs', "n"]).agg({'mention': lambda x : '; '.join(f"``{m}''"for m in x)}).reset_index()
expls_df.sort_values(['n_attrs', "n", 'categories_str'], ascending=[True, False, True])
expls_df.drop_duplicates(['cat1', 'cat2']).query('cat1!="Other" and cat2!="Other"').reset_index(drop=True)
tab = expls_df.query('n> 10').sort_values(['cat1', 'cat2'])
tab = tab[['categories_str', 'n', 'mention']]
tab.columns = ["attribute combination", "$N$", "examples"]
latex_table(tab, column_format='p{2.2in} l p{3in}')

@tbl-thau_conflicting_group_categorization_examples illustrates the consequence of this annotation design choice by giving examples from Thau's data where the same verbatim identical group mentions were categorized into different categories at different times of the annotation process.
For example, the group mention "homeless 16 and 17 year olds" has been classified as both _Economic class_ and _Age/generation_ in this data, and this label "confusion" occured for 50 group mentions in total. 

Many of the examples in @tbl-thau_conflicting_group_categorization_examples are arguably clearly intersectional, such as "homeless 16 and 17 year olds" (economic status and age), "the richest and poorest areas in our country" (economic status and location or nationality), and "low income families" (economic status and family).
In fact, about 9.4% of social group mentions in Thau's data are affected by this problem.

Importantly, this observation does not indicate annotation errors _per se_.
Instead, it illustrate why forcing a single-label classification scheme on a phenomenon that is inherently multi-dimensional can be problematic.
Group mentions that exhibit multiple attributes, like "the richest and poorest areas in our country" (economic status and location or nationality) and "low income families" (economic status and family), are simply more accurately labeled if assigned to multiple categories _because_ they are intersectional.


### Examples from comparison to Thau's social group category classifications


Next, we zoom in on three of Thau's group categories: _Economic class_, _Age/generation_, and his _Other_ group category.
We focus on these three for different reasons.
The category _Economic class_ is interesting because it is an abstract, multi-facetted sociological construct that rarely manifests explicitly in social group mentions but is rather communicated through various indicators, such as occupation, income, or employment status.
The category _Age/generation_ is interesting because it is appears conceptually more clearly delineated but is arguably an attribute-centered category in disguise, making it very prone to intersectionality with other attributes.
The _Other_ category is interesting because it is a catch-all category that is often used for mentions that do not fit well into the other categories, and thus is likely to be highly heterogeneous and intersectional.

#### Economic class

In [None]:
thau_econ_class_mentions = df_thau_preds.query("group_category=='Economic class'")
thau_econ_class_mentions['labels'] = thau_econ_class_mentions.apply(lambda row: ' + '.join([l for c, l in attribute_category_names_map.items() if row[c]==1]), axis=1)
tmp = thau_econ_class_mentions.\
    groupby('labels').\
    agg({'group_type': 'count', 'mention': lambda x: list(x.sample(min(len(x), 5), random_state=1 ))}).\
    reset_index().\
    rename(columns={'group_type': 'count'}).\
    sort_values('count', ascending=False)
tmp = tmp[tmp['labels']!='']
tmp['n_attributes'] = tmp['labels'].str.count(' \+ ')+1
tmp['intersectional'] = tmp['n_attributes'] > 1
tmp['examples'] = tmp['mention'].apply(lambda ms: '; '.join([f"``{m}''" for m in ms]))

In [None]:
cnts = tmp.groupby('intersectional').agg({'count': 'sum'})
props = cnts / cnts.sum()
props 

Having applied our multi-attribute classification approach to social group mentions categorized according to Thau's group category scheme, we estimate that about 32% of mentions in his _Economic class_ category are intersectional references.
@tbl-thau_econ_class_mentions_single_attr_examples shows that among the 68% of single-attribute mentions in this subset, most are classified as _income/wealth/economic status_, _occupation/profession_, or _employment status_ instances in our scheme.

In [None]:
#| label: tbl-thau_econ_class_mentions_single_attr_examples
#| output: true
#| tbl-cap: "Social group mentions in Thau's _Economic class_ group category that are assigned to a single attribute category according to our scheme and their absolute frequency. *Note:* Table only reports results for the six most prevalent attribute categories."

# NOTE: not sure whether we should interprete these numbers as relative prevalence of attribute co-occs
tab = tmp.query("n_attributes == 1").head(6)[['labels', 'count', 'examples']]
tab.columns = ["attribute category", "$N$", "examples"]
latex_table(tab, column_format='p{2.2in} l p{3in}')

In [None]:
#| label: tbl-thau_econ_class_mentions_multi_attr_examples
#| output: true
#| tbl-cap: "Social group mentions in Thau's _Economic class_ that are assigned to two or more attribute categories according to our scheme and their absolute frequency. *Note:* Table only reports results for the six most prevalent attribute combinations."

# NOTE: not sure whether we should interprete these numbers as relative prevalence of attribute co-occs
tab = tmp.query("n_attributes >= 2").head(6)[['labels', 'count', 'examples']]
tab.columns = ["attribute combination", "$N$", "examples"]
latex_table(tab, column_format='p{2.2in} l p{3in}')

However, other social group mentions in this subset of Thau's data are often labeled as combinations among these economic attributes or with other attributes,  often non-economic ones like _family_, _age_, or _place/location_.
@tbl-thau_econ_class_mentions_multi_attr_examples shows, for example, that mentions in Thau's _economic class_ group category are often labeled as intersectional references featurin the attributes _income/wealth/economic status_ and _family_ or _employment status_ and _age_, respectively.

#### Family

In [None]:
thau_gender_mentions = df_thau_preds.query("group_category=='Gender'")
thau_gender_mentions['labels'] = thau_gender_mentions.apply(lambda row: ' + '.join([l for c, l in attribute_category_names_map.items() if row[c]==1]), axis=1)
tmp = thau_gender_mentions.\
    groupby('labels').\
    agg({'group_type': 'count', 'mention': lambda x: list(x.sample(min(len(x), 5), random_state=1 ))}).\
    reset_index().\
    rename(columns={'group_type': 'count'}).\
    sort_values('count', ascending=False)
tmp = tmp[tmp['labels']!='']
tmp['n_attributes'] = tmp['labels'].str.count(' \+ ')+1
tmp['intersectional'] = tmp['n_attributes'] > 1
tmp['examples'] = tmp['mention'].apply(lambda ms: '; '.join([f"``{m}''" for m in ms]))

In [None]:
#| eval: false
cnts = tmp.groupby('intersectional')['count'].sum()
props = cnts / cnts.sum()
props 

In [None]:
#| label: tbl-thau_gender_mentions_single_attr_examples
#| output: true
#| tbl-cap: "Social group mentions in Thau's _gender_ group category that are assigned to only one attribute category according to our scheme and their absolute frequency. *Note:* Table only reports the results for the six most prevalent attribute categories."
# TODO:  make this a table
tab = tmp.query("n_attributes == 1").head(6)[['labels', 'count', 'examples']]

# NOTE: this reveals some misclassifictions
#  - "women prisoners" => also crime
#  - "boys" => also gender/sexuality
#  - "men who work in the public services" => also gender/sexuality

tab.columns = ["attribute", "$N$", "examples"]
latex_table(tab, column_format='p{2.2in} l p{3in}')

Thau's _Gender_ group category is an example where our scheme is slightly broader because it also includes the aspect of sexual orientation.
Mentions in this subset of Thau's data that are assigned to only one attribute in our scheme (55.1%) are typically categorized as _family_ or _gender/sexuality_ instances (see @tbl-thau_gender_mentions_single_attr_examples).

In [None]:
#| label: tbl-thau_gender_mentions_multi_attr_examples
#| output: true
#| tbl-cap: "Social group mentions in Thau's _gender_ group category that are assigned to two attribute categories according to our scheme and their absolute frequency. *Note:* Table only reports the results for the six most prevalent attribute combinations."
# TODO:  make this a table
tab = tmp.query("n_attributes == 2").head(6)[['labels', 'count', 'examples']]
tab.columns = ["attribute combination", "$N$", "examples"]
latex_table(tab, column_format='p{2.2in} l p{3in}')

Yet, despite our gender-related category is already broader than Thau's, we still find that about 44.9% of the group mentions classified into Thau's _Gender_ category are intersectional according to our annotations.
@tbl-thau_gender_mentions_multi_attr_examples shows that the most common attribute combinations are _family_ and _gender/sexuality_ and _occupation/profession_ + _gender/sexuality_, respectively.
This is in parts driven by how we treat gendered references to familial roles, like father, mother, husband, wive, etc., which we consider intersectional.
Other instances where we find intersectionality are not explained by this difference in coding appraoches, however, such as mentions that feature gender/sexuality alongside occpuation/profession ("service women"),  employment status ("women part-time workers"), or nationality ("British women married to foreign husbands").


#### "Other"

Another intersting point of comparison to Thau's group category classifications are mentions in his _Other_ category.
Overall, about 22.5% of sopcial group mentions have been classified into this category by his annotators.

In [None]:
#| eval: false
(df_thau_preds.query("group_type=='Social group'").group_category=='Other').mean()

In [None]:
thau_other_mentions = df_thau_preds.query("group_category=='Other'")
thau_other_mentions['labels'] = thau_other_mentions.apply(lambda row: ' + '.join([l for c, l in attribute_category_names_map.items() if row[c]==1]), axis=1)
tmp = thau_other_mentions.\
    groupby('labels').\
    agg({'group_type': 'count', 'mention': lambda x: list(x.sample(min(len(x), 5), random_state=1 ))}).\
    reset_index().\
    rename(columns={'group_type': 'count'}).\
    sort_values('count', ascending=False)
tmp['n_attributes'] = tmp['labels'].str.count(' \+ ')+1
tmp['any_attributes'] = tmp['labels']!=''
tmp.loc[~tmp['any_attributes'], 'n_attributes'] = 0
tmp['intersectional'] = tmp['n_attributes'] > 1
tmp['examples'] = tmp['mention'].apply(lambda ms: '; '.join([f"``{m}''" for m in ms]))

In [None]:
#| eval: false
cnts = tmp.groupby(['any_attributes', 'intersectional'])['count'].sum()
props = cnts / cnts.sum()
props 

In [None]:
#| eval: false
cnts = tmp.query('any_attributes').groupby('intersectional')['count'].sum()
props = cnts / cnts.sum()
props 

Our multilabel attribute classification approach assings 80.8% of mentions in Thau's _Other_ category to at least one attribute in our scheme, while the remaining 19.2% of mentions in this category are not labeled as featuring any of our attributes and thus considered "universal" group references.
As shown in @tbl-thau_other_mentions_examples, our approach thus makes 4 out of 5 mentions in Thau's general _Other_ group analytically accesible by labeling them as featuring one (71.1%) or more attributes (28.9%) in our scheme.

In [None]:
#| label: tbl-thau_other_mentions_examples
#| output: true
#| tbl-cap: "Social group mentions in Thau's _gender_ group category that are assigned to two attribute categories according to our scheme and their absolute frequency. *Note:* Table only reports the results for the six most prevalent attribute combinations."
# TODO:  make this a table
tab = tmp.query("n_attributes >= 1").head(6)[['labels', 'count', 'examples']]
tab.columns = ["attribute(s)", "$N$", "examples"]
latex_table(tab, column_format='p{2.2in} l p{3in}')

## Cross-validation against Horne et al.'s group category annotations  {#sec-horne_crossvalidation}

To shed more light on the convergence and divergences between our attribute classification and the group categtorization scheme by @horne_using_2025, we analyze two random samples of 5,000 mentions each labeled as featuring the _occupation/profession_ attribute and mentions labeled as featuring the _gender/sexuality_ attribute, respectively.
These samples allow us to assess convergent validity by examining whether mentions our group attribute classifiers identify as featuring occupation-related or gender-related attributes are similarly categorized by Horne et al.'s group category classifier.
The dual classification creates overlapping annotations that reveal both areas of agreement (where both schemes identify similar patterns) and divergence (where our attribute-based approach captures distinctions not present in their categorical scheme).
This comparison helps assessing to what extent our attribute-centered framework aligns with established categorization but also adds analytical value.
Importantly, we examine these examples not to suggest that our classifiers have a higher accuracy but to illustrate how our attribute-centered approach seems to be better suited to accommodate interesectionality because it puts all attribute categories on the same analytical level.

##### Occupation profession

In [None]:
occupation_cats = [
	'Caregivers',
	'Civil Servants',
	'Education Professionals',
	'Employees And Workers',
	'Employers And Business Owners',
	'Farmers',
	'Health Professionals',
	'Investors And Stakeholders',
	'Law Enforcement Personnel',
	'Manual And Service Workers',
	'Military Personnel',
	'Politicians',
	'Sociocultural Professionals',
	'White Collar Workers',
]

tmp = horne_predictions_econ_occprof_attribute_sample.copy()


In [None]:
#| eval: false
len(occupation_cats)
', '.join([f"_{cat}_" for cat in occupation_cats])

In [None]:
#7 eval: false
tmp[occupation_cats].any(axis=1).mean()

First, we focus on categories in Horne et al.'s classification scheme that capture references to specific occupational groups.
In total, we identified 14 such categories in their scheme, ranging from _Caregivers_ to _White Collar Workers_.[^fn:horne_occupation_cats]

Regarding convergence between their and our annotation scheme, we expect that our focus on occupation/profession-related attributes will lead to a high degree of overlap with mentions categorized into one or several of these categories by Horne et al.' classifiers.
This is confirmed by our analysis, which finds that the share of mentions labeled as expressing an _occupation/profession_ attribute by our classifier that are classified into an occupation group category by Horne et al.'s classifier is 83.8% -- a high degree of correspondence.

[^fn:horne_occupation_cats]: _Caregivers_, _Civil Servants_, _Education Professionals_, _Employees And Workers_, _Employers And Business Owners_, _Farmers_, _Health Professionals_, _Investors And Stakeholders_, _Law Enforcement Personnel_, _Manual And Service Workers_, _Military Personnel_, _Politicians_, _Sociocultural Professionals_, _White Collar Workers_


In [None]:
vectorizer = CountVectorizer(
    stop_words=stopwords.words('english'),
    ngram_range=(1, 3), 
    max_df = 0.8
)

idxs = horne_predictions_econ_occprof_attribute_sample[occupation_cats].any(axis=1)
fw = compute_fighting_words(
    l1=horne_predictions_econ_occprof_attribute_sample.loc[ idxs, 'text'].to_list(),
    l2=horne_predictions_econ_occprof_attribute_sample.loc[~idxs, 'text'].to_list(),
    cv=vectorizer,
)
fw_ours_vs_horne_occupation = pd.DataFrame(fw, columns=['word', 'score']).sort_values('score', ascending=False)

In [None]:
#| label: fig-ours_vs_horne_occupation_fighting_words
#| output: true
#| fig-cap: "Most distinctive words for mentions labeled as featuring occupation/profession as an attribute by our classifier depending on whether by Horne et al.'S classifier has classified them into (at least) one of their occupuation/profession-related categories (left) or not (right). Values plotted are $z$-scores from \"fighting words\" on sample of 5000 occupation/profession mentions. Values above ±1.96 (vertical dashed line) can be considered significantly distinctive."

# Get top 20 lowest (most negative) and highest (most positive) scores
top_negative = fw_ours_vs_horne_occupation.nsmallest(20, 'score').sort_values('score', ascending=False)
top_positive = fw_ours_vs_horne_occupation.nlargest(20, 'score').sort_values('score', ascending=True)

# Create two-column layout
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

# Left plot: positive scores (distinctive for classified by Horne et al.)
ttl = r"mentions categorized by both as occupation/profession-related"
ax1.axvline(x=1.96, color='black', linestyle='--', linewidth=0.8, zorder=1)
ax1.barh(range(len(top_positive)), top_positive['score'], color='#1b9e77', zorder=2)
ax1.set_yticks(range(len(top_positive)))
ax1.set_yticklabels(top_positive['word'])
ax1.set_xlabel('z-score', fontsize=11)
ax1.set_title(ttl, fontweight='bold', fontsize=12)
ax1.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
ax1.yaxis.tick_right()
ax1.yaxis.set_label_position('right')
plt.setp(ax1.get_yticklabels(), ha='left')
ax1.set_xlim(0, 10)
ax1.invert_xaxis()  # Invert to show negative values extending left

# Right plot: negative scores (distinctive for NOT classified by Horne et al.)
ttl = r"mentions categorized only by ours as occupation/profession-related"
ax2.axvline(x=-1.96, color='black', linestyle='--', linewidth=0.8, zorder=1)
ax2.barh(range(len(top_negative)), top_negative['score'], color='#d95f02', zorder=2)
ax2.set_yticks(range(len(top_negative)))
ax2.set_yticklabels(top_negative['word'])
ax2.set_xlabel('z-score', fontsize=11)
ax2.set_title(ttl, fontweight='bold', fontsize=12)
ax2.set_xlim(-10, 0)
ax2.invert_xaxis()  # Invert to show negative values extending left

plt.tight_layout()
plt.show()

In [None]:
#| label: tbl-ours_vs_horne_occupation_fighting_words_examples
#| output: true
#| tbl-cap: "Examples of social group mentions labeled as featuring occupation/profession as an attribute by our classifier that were not assigned to any of Horne et al.'s occupation-related group categories by their classifier. Values computed by summing \"fighting words\" scores as weights of mentions' tokens, normalized by number of tokens."
fw_lookup = {r['word']: r['score'] for r in fw_ours_vs_horne_occupation.to_dict(orient='records')}
fw_vals = np.array([fw_lookup[f] for f in vectorizer.get_feature_names_out()])
analyzer = vectorizer.build_analyzer()

# vectorize mentions 
mentions = horne_predictions_econ_occprof_attribute_sample[~idxs].reset_index(drop=True)
mentions_texts = mentions['text']
X_mentions = vectorizer.transform(mentions_texts.tolist())
# binarize
X_mentions[X_mentions>0] = 1
# apply z-score values to each row in `X_mentions` as weights
X_mentions_scores = X_mentions @ fw_vals[:, np.newaxis]
# normalize for mention length
X_mentions_scores /= X_mentions.sum(axis=1)

mention_scores = X_mentions_scores[:, 0]
rank = mention_scores.argsort()#[::-1]

n_ = 20
tab = horne_predictions_econ_occprof_attribute_sample.loc[~idxs, ['text', *label_cols_horne]].iloc[rank]
tab['score'] = mention_scores[rank]
tab = tab[tab['score']<-1.96]
tab['text_norm'] = tab['text'].apply(lambda x: ' '.join(analyzer(x)).strip())
tab = tab.drop_duplicates('text_norm')# .head(n_).reset_index(drop=True)
tab = tab.sample(n_, weights=tab['score'].abs(), random_state=42)
tab.loc[:, 'horne'] = tab.iloc[:, 1:].apply(lambda row: '; '.join([l for l in label_cols_horne if row[l]==1]), axis=1)
tab = tab[['text', 'score', 'horne']].sort_values('score')
tab.columns = ["Mention", "$z$-score", "Horne et al. classification"]
# TODO: make latex table
latex_table(tab)

However, we also expect that due to being more encompassing and abstract than some of Horne et al's categories, such as _Civil servants_, _Farmers_, _Health professionals_, etc., our _occupation/profession_ attribute will capture a broader set of mentions.
We assess this question through a "fighting words" analysis [@monroe_fightin_2008], a method for identifying $n$-gram patterns that distinguish the 18% of social group mentions labeled as featuring the attribute of _occupation/profession_ by our classifier that are not classified into any of Horne et al.'s occupation-related categories.
@fig-ours_vs_horne_occupation_fighting_words and @tbl-ours_vs_horne_occupation_fighting_words_examples show that our _occupation/profession_ classifications capture occupational and professional groups not covered by Horne et al.'s schemes, including broad categories like "experts" and "practitioners"  as well as specific categories like "drivers".

##### Gender / Sexuality

In [None]:
gender_sexuality_cats = [
	'Lgbtqi',
	'Men',
	'Women',
]

tmp = horne_predictions_nonecon_gender_attribute_sample.copy()

In [None]:
#| eval: false
tmp[gender_sexuality_cats].any(axis=1).mean()

Turning, to mentions that feature attributes related to gender and/or sexuality, we perform a similar cross-validation exercise by comparing their overlap with classifications into Horne et al.'s group categories _LGBTQI_, _Men_, and _Women_.
Again, we find a high overlap between classifications.
91.4% of mentions labeled as expressing a _gender/sexuality_ attribute by our classifier are classified into (at least) one of Horne et al.'s _LGBTQI_, _Men_, or _Women_ group categories by their classifier.

In [None]:
vectorizer = CountVectorizer(
    stop_words=stopwords.words('english'),
    ngram_range=(1, 3), 
    max_df = 0.8
)

idxs = horne_predictions_nonecon_gender_attribute_sample[gender_sexuality_cats].any(axis=1)
fw = compute_fighting_words(
    l1=horne_predictions_nonecon_gender_attribute_sample.loc[ idxs, 'text'].to_list(),
    l2=horne_predictions_nonecon_gender_attribute_sample.loc[~idxs, 'text'].to_list(),
    cv=vectorizer,
)
fw_ours_vs_horne_gender = pd.DataFrame(fw, columns=['word', 'score']).sort_values('score', ascending=False)

In [None]:
#| label: fig-ours_vs_horne_gender_fighting_words
#| output: true
#| fig-cap: "Top 20 most distinctive words for gender/sexuality mentions classified by Horne et al. (right) vs. not classified (left)"

# Get top 20 lowest (most negative) and highest (most positive) scores
top_negative = fw_ours_vs_horne_gender.nsmallest(20, 'score').sort_values('score', ascending=False)
top_positive = fw_ours_vs_horne_gender.nlargest(20, 'score').sort_values('score', ascending=True)

# Create two-column layout
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

# Left plot: positive scores (distinctive for classified by Horne et al.)
ttl = r"mentions categorized by both as gender/sexuality-related"
ax1.axvline(x=1.96, color='black', linestyle='--', linewidth=0.8, zorder=1)
ax1.barh(range(len(top_positive)), top_positive['score'], color='#1b9e77', zorder=2)
ax1.set_yticks(range(len(top_positive)))
ax1.set_yticklabels(top_positive['word'])
ax1.set_xlabel('Score (z-score)', fontsize=11)
ax1.set_title(ttl, fontweight='bold', fontsize=12)
ax1.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
ax1.yaxis.tick_right()
ax1.yaxis.set_label_position('right')
plt.setp(ax1.get_yticklabels(), ha='left')
ax1.set_xlim(0, 25)
ax1.invert_xaxis()  # Invert to show negative values extending left

# Right plot: negative scores (distinctive for NOT classified by Horne et al.)
ttl = r"mentions categorized only by ours as gender/sexuality-related"
ax2.axvline(x=-1.96, color='black', linestyle='--', linewidth=0.8, zorder=1)
ax2.barh(range(len(top_negative)), top_negative['score'], color='#d95f02', zorder=2)
ax2.set_yticks(range(len(top_negative)))
ax2.set_yticklabels(top_negative['word'])
ax2.set_xlabel('Score (z-score)', fontsize=11)
ax2.set_title(ttl, fontweight='bold', fontsize=12)
ax2.set_xlim(-25, 0)
ax2.invert_xaxis()  # Invert to show negative values extending left

plt.tight_layout()
plt.show()

In [None]:
#| label: tbl-ours_vs_horne_gender_fighting_words_examples
#| output: true
#| tbl-cap: "Examples of social group mentions labeled as featuring gender/sexuality as an attribute by our classifier that were not assigned to any of Horne et al.'s Men, Women, or LGBTQI categories by their classifier. Values computed by summing \"fighting words\" scores as weights of mentions' tokens, normalized by number of tokens."
fw_lookup = {r['word']: r['score'] for r in fw_ours_vs_horne_gender.to_dict(orient='records')}
fw_vals = np.array([fw_lookup[f] for f in vectorizer.get_feature_names_out()])
analyzer = vectorizer.build_analyzer()

# vectorize mentions 
mentions = horne_predictions_nonecon_gender_attribute_sample[~idxs].reset_index(drop=True)
mentions_texts = mentions['text']
X_mentions = vectorizer.transform(mentions_texts.tolist())
# binarize
X_mentions[X_mentions>0] = 1
# apply z-score values to each row in `X_mentions` as weights
X_mentions_scores = X_mentions @ fw_vals[:, np.newaxis]
# normalize for mention length
X_mentions_scores /= X_mentions.sum(axis=1)

mention_scores = X_mentions_scores[:, 0]
rank = mention_scores.argsort()#[::-1]

n_ = 20
tab = horne_predictions_nonecon_gender_attribute_sample.loc[~idxs, ['text', *label_cols_horne]].iloc[rank]
tab['score'] = mention_scores[rank]
tab = tab[tab['score']<-1.64]
tab['text_norm'] = tab['text'].apply(lambda x: ' '.join(analyzer(x)).strip())
tab = tab.drop_duplicates('text_norm')#.head(n_).reset_index(drop=True)
tab = tab.sample(n_, weights=tab['score'].abs(), random_state=1)
tab.loc[:, 'horne'] = tab.iloc[:, 1:].apply(lambda row: '; '.join([l for l in label_cols_horne if row[l]==1]), axis=1)
tab[['text', 'score', 'horne']].sort_values('score')
tab = tab[['text', 'score', 'horne']].sort_values('score')
tab.columns = ["Mention", "$z$-score", "Horne et al. classification"]
# TODO: make latex table
latex_table(tab, column_format='p{3in} l l')

However, looking at the remaining mentions that we label as _gender/sexuality_-related but not Horne et al.'s classifier, we again find interesting patterns.
Most common are mentions including the familial role "mother" (see @fig-ours_vs_horne_gender_fighting_words), some of which combine this attribute with qualifiers  (see @tbl-ours_vs_horne_gender_fighting_words_examples).