# placeholder {.hidden}

## Prevalence and intersectionality {#sec-prevalence_intersectionality}

In [None]:
#| eval: false
import os
from pathlib import Path
import gc

import pandas as pd
import numpy as np

import re
from utils.fighting_words import bayes_compare_language as compute_fighting_words
from sklearn.feature_extraction.text import CountVectorizer
from nltk.corpus import stopwords

import sys
base_path = Path.cwd().parent.parent
sys.path.append(str(base_path / 'paper'))

from reporting import *

# mock table printing functions for development
from IPython.display import HTML
latex_table = lambda x, *args, **kwargs: HTML(x.to_html(**kwargs))

# suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')


# Define base directory (paper/secs -> go up two levels to project root)
data_path = base_path / 'data'
labeled_path = data_path / 'labeled'
intermediate_path = data_path / 'intermediate'
manifestos_path = data_path / 'manifestos'
annotations_path = data_path / 'annotations' # / 'group_mention_categorization'

In [None]:
#| cache: true
fp = labeled_path / 'labeled_mentions_with__party_metadata.pkl'
df = pd.read_pickle(fp)

In [None]:
# inspect extreme examples with 3 econ-attributes
print(*df.loc[df[econ_attrs].sum(axis=1)==3, :].apply(lambda x: f"'\033[1m{x['text']}\033[0m'\n  - " + '\n  - '.join([attribute_category_names_map[a] for a in  econ_attrs if x[a]==1]), axis=1).sample(3, random_state=42).to_list(), sep='\n\n')

In [None]:
# TODO: inspect extreme examples with ≥4 attributes
print(*df.loc[df[nonecon_attrs].sum(axis=1)>=3, :].apply(lambda x: f"'\033[1m{x['text']}\033[0m'\n  - " + '\n  - '.join([attribute_category_names_map[a] for a in nonecon_attrs if x[a]==1]), axis=1).sample(3, random_state=42).to_list(), sep='\n\n')

In [None]:
any_econ = df[econ_attrs].any(axis=1)
any_nonecon = df[nonecon_attrs].any(axis=1)

any_econ.name = "econ"
any_nonecon.name = "non-econ"

In [None]:
# shares of mentions with at least one economic attribute
any_econ.value_counts(normalize=True)

In [None]:
# shares of mentions with at least one non-economic attribute
any_nonecon.value_counts(normalize=True)

In [None]:
# shares of mentions with at least one economic and/or at least one non-economic attribute
tab = pd.crosstab(any_econ, any_nonecon, normalize=True)
tab

In [None]:
#| eval: false

# Calculate cross-dimension intersectionality share among mentions with ≥2 attributes
mulit_attr_mentions = (df[econ_attrs + nonecon_attrs].sum(axis=1) >= 2)
df_mulit_attr_mentions = df[mulit_attr_mentions].copy()

any_econ_2plus = df_mulit_attr_mentions[econ_attrs].any(axis=1)
any_nonecon_2plus = df_mulit_attr_mentions[nonecon_attrs].any(axis=1)

any_econ_2plus.name = "econ"
any_nonecon_2plus.name = "non-econ"

tab = pd.crosstab(any_econ_2plus, any_nonecon_2plus, normalize=True)
tab

In [None]:
# Calculate the values for the Venn diagram
any_econ = df[econ_attrs].any(axis=1)
any_nonecon = df[nonecon_attrs].any(axis=1)
any_attr = any_econ | any_nonecon

# Total counts
total = len(df)
only_econ = (any_econ & ~any_nonecon).sum()
only_nonecon = (~any_econ & any_nonecon).sum()
both = (any_econ & any_nonecon).sum()
neither = (~any_econ & ~any_nonecon).sum()  # universal mentions

attr_prevalence_breakdown = {
    'total': [total, 1.0, 'Total number of predicted mentions'],
    'neither': [neither, neither/total, 'Number and share of mentions with no predicted attributes ("universal" group references)'],
    'has any attribute': [any_attr.sum(), any_attr.sum()/total, 'Number and share of mentions predicted to have at least one attribute (economic or non-economic)'],
    'has economic': [any_econ.sum(), any_econ.sum()/total, 'Number and share of mentions predicted to have at least one economic attribute'],
    'has non-economic': [any_nonecon.sum(), any_nonecon.sum()/total, 'Number and share of mentions predicted to have at least one non-economic attribute'],
    'has only economic': [only_econ, only_econ/total, 'Number and share of mentions predicted to have only economic attributes'],
    'has only non-economic': [only_nonecon, only_nonecon/total, 'Number and share of mentions predicted to have only non-economic attributes'],
    'has both': [both, both/total, 'Number and share of mentions predicted to have both economic and non-economic attributes'],
}
attr_prevalence_breakdown_tab = pd.DataFrame.from_dict(attr_prevalence_breakdown, orient='index', columns=['count', 'proportion', 'description'])

In [None]:
# Print the statistics
# Calculate within-dimension intersectionality
any_inersectionality = (df[econ_attrs + nonecon_attrs].sum(axis=1) >= 2)
any_inersectionality = any_inersectionality[any_attr]  # only among

any_econ = df[econ_attrs].any(axis=1)
any_econ_multi = (df[econ_attrs].sum(axis=1) >= 2)
any_econ_multi = any_econ_multi[any_attr]
pure_econ_multi = (df[econ_attrs].sum(axis=1) >= 2) & (~any_nonecon)  # 2+ economic, 0 non-economic
pure_econ_multi = pure_econ_multi[any_attr]

any_nonecon = df[nonecon_attrs].any(axis=1)
any_nonecon_multi = (df[nonecon_attrs].sum(axis=1) >= 2)
any_nonecon_multi = any_nonecon_multi[any_attr]
pure_nonecon_multi = (df[nonecon_attrs].sum(axis=1) >= 2) & (~any_econ)  # 2+ non-economic, 0 economic
pure_nonecon_multi = pure_nonecon_multi[any_attr]

any_mixed_multi = (df[econ_attrs].sum(axis=1) >= 1) & (df[nonecon_attrs].sum(axis=1) >= 1)  # 2+ from both
any_mixed_multi = any_mixed_multi[any_attr]
n_any_attr = any_attr.sum()
attr_intersectionality_breakdown = {
    'total': [n_any_attr, 1.0, 'mentions with at least one predicted attribute (economic or non-economic)'],
    'any intersectionality': [any_inersectionality.sum(), any_inersectionality.sum()/n_any_attr, 'mentions with at least one predicted attribute that exhibit any kind of intersectionality (within- and/or cross-dimensional)'],

    'any economic': [any_econ.sum(), any_econ.sum()/n_any_attr, 'mentions with at least one predicted attribute that have at least one economic attribute'],
    'any economic within-dimensional': [any_econ_multi.sum(), any_econ_multi.sum()/any_econ.sum(), 'mentions with at least one economic attribute that have 2 or more economic attributes'],
    'pure economic within-dimensional': [pure_econ_multi.sum(), pure_econ_multi.sum()/any_econ.sum(), 'mentions with at least one economic attribute and no non-economic attributes that have 2 or more economic attributes'],
    
    'any non-economic': [any_nonecon.sum(), any_nonecon.sum()/n_any_attr, 'mentions with at least one predicted attribute that have at least one non-economic attribute'],
    'any non-economic within-dimensional': [any_nonecon_multi.sum(), any_nonecon_multi.sum()/any_nonecon.sum(), 'mentions with at least one non-economic attribute that have 2 or more non-economic attributes'],
    'pure non-economic within-dimensional': [pure_nonecon_multi.sum(), pure_nonecon_multi.sum()/any_nonecon.sum(), 'mentions with at least one non-economic attribute and no economic attributes that have 2 or more non-economic attributes'],
    
    'any cross-dimensional': [any_mixed_multi.sum(), any_mixed_multi.sum()/n_any_attr, 'mentions with at least one economic and at least one non-economic attribute that have 2 or more attributes across dimensions'],
}
attr_intersectionality_breakdown_tab = pd.DataFrame.from_dict(attr_intersectionality_breakdown, orient='index', columns=['count', 'proportion', 'description'])


In [None]:
#| echo: false
#| output: false

# deine base colors
# econ_base = '#fee08b'     # light yellow
# nonecon_base = '#d9ef8b'  # light green

econ_base = mcolors.to_hex(plt.cm.Set2(0)) # "#61e8fd"
nonecon_base = mcolors.to_hex(plt.cm.Set2(1)) # "#fd61cc"

# Color schemes for each dimension
econ_colors = {'single': econ_base}
econ_colors['within'] = intensify_color(econ_base, sat_factor=2, val_factor=0.85)  # more saturated + darker
nonecon_colors = {'single': nonecon_base}
nonecon_colors['within'] = intensify_color(nonecon_base, sat_factor=2, val_factor=0.85)  # more saturated + darker
econ_colors['cross'] = nonecon_colors['cross'] = blend_hex_colors(econ_base, nonecon_base, 0.5)  # more yellow

econ_colors['both'] = blend_hex_colors(econ_colors['within'], econ_colors['cross'], 0.5)
nonecon_colors['both'] = blend_hex_colors(nonecon_colors['within'], nonecon_colors['cross'], 0.5)

show_color_palette(econ_colors, title="Economic attribute colors")
show_color_palette(nonecon_colors, title="Non-economic attribute colors")

In [None]:
#| label: fig-attribute_dimensions_venn
#| output: true
#| fig-cap: "Distribution of social group mentions across attribute dimensions. The venn diagram shows the share of mentions featuring economic attributes, non-economic attributes, both types of attributes (intersection), or neither ('universal' mentions, gray). Areas are proportional to the relative frequencies in the data. Numbers show the share of mentions in each category. For example, 43.9% of mentions feature only non-economic attributes, not considering the 5.6% of mentions that feature at least one attribute of both attribute dimensions."

# For venn3, we need to specify 7 subset sizes:
# The notation is (Abc, aBc, ABc, abC, AbC, aBC, ABC)
# A = economic, B = non-economic, C = any attributes (complement of neither)
# Since any mention with A or B must be in C, we have:
# - Abc (100): 0 (if has A, must be in C)
# - aBc (010): 0 (if has B, must be in C)  
# - ABc (110): 0 (if has A and B, must be in C)
# - abC (001): neither = 0 (we want this outside, so set to 0)
# - AbC (101): only economic
# - aBC (011): only non-economic
# - ABC (111): both

# Actually, simpler approach: make C represent the "universe" 
# and put neither outside all circles, but venn3 doesn't easily support that

# Better: Use venn3 with:
# A = economic, B = non-economic, C = universal/neither
# where all sets are mutually exclusive at the leaf level

fig, ax = plt.subplots(figsize=(6, 3), dpi=300)

# venn3 subsets: (Abc, aBc, ABc, abC, AbC, aBC, ABC)
# We want: only_econ, only_nonecon, both, neither as separate regions
# Set A = economic, B = non-economic, C = no attributes (neither)
# Then:
# - AbC (101): 0 (can't have both economic and no attributes)
# - aBC (011): 0 (can't have both non-economic and no attributes)
# - ABC (111): 0 (can't have all three)
# - ABc (110): both (has both econ and non-econ, not universal)
# - Abc (100): only economic
# - aBc (010): only non-economic  
# - abC (001): neither/universal

subsets = (only_econ,  # Abc (100)
           only_nonecon,  # aBc (010)
           both,  # ABc (110)
           neither,  # abC (001)
           0,  # AbC (101)
           0,  # aBC (011)
           0)  # ABC (111)

v = venn3(subsets=subsets,
          set_labels=('economic attributes', 'non-economic attributes', '"universal" (no attributes)'),
          ax=ax)

# Customize colors for the four regions
if v.get_patch_by_id('100'):  # only economic
    v.get_patch_by_id('100').set_color(econ_colors['single'])
    v.get_patch_by_id('100').set_alpha(0.7)
if v.get_patch_by_id('010'):  # only non-economic
    v.get_patch_by_id('010').set_color(nonecon_colors['single'] )
    v.get_patch_by_id('010').set_alpha(0.7)
if v.get_patch_by_id('110'):  # both
    v.get_patch_by_id('110').set_color(nonecon_colors['cross'])
    v.get_patch_by_id('110').set_alpha(0.7)
if v.get_patch_by_id('001'):  # neither
    v.get_patch_by_id('001').set_color('lightgrey')
    v.get_patch_by_id('001').set_alpha(0.7)

# Update labels with counts and percentages
label_map = {
    '100': (only_econ, 'Only economic'),
    '010': (only_nonecon, 'Only non-econ'),
    '110': (both, 'Both'),
    '001': (neither, 'Neither')
}

for region_id, (count, name) in label_map.items():
    label = v.get_label_by_id(region_id)
    if label and count > 0:
        pct = (count / total) * 100
        # label.set_text(f'{pct:.1f}%\n({count:,})')
        label.set_text(f'{pct:.1f}%')
        label.set_fontsize(9)
        label.set_fontweight('bold')

# ax.set_title('Distribution of Social Group Mentions by Attribute Types',
#              fontsize=12, fontweight='bold', pad=20)

# # Create legend
# legend_elements = [
#     Patch(facecolor='#fee08b', edgecolor='black', alpha=0.7,
#           label=f'Only economic: {only_econ:,} ({(only_econ/total)*100:.1f}%)'),
#     Patch(facecolor='#d9ef8b', edgecolor='black', alpha=0.7,
#           label=f'Only non-economic: {only_nonecon:,} ({(only_nonecon/total)*100:.1f}%)'),
#     Patch(facecolor='#a6d96a', edgecolor='black', alpha=0.7,
#           label=f'Both (intersectional): {both:,} ({(both/total)*100:.1f}%)'),
#     Patch(facecolor='lightgrey', edgecolor='black', alpha=0.6,
#           label=f'Neither (universal): {neither:,} ({(neither/total)*100:.1f}%)'),
# ]
# ax.legend(handles=legend_elements, loc='upper left', bbox_to_anchor=(1.02, 1),
#           fontsize=9, frameon=True, title='Mention types')

plt.tight_layout()
plt.show()

In [None]:
#| eval: false
HTML(attr_prevalence_breakdown_tab.round(3).to_html())

In [None]:
10747/154837

@fig-attribute_dimensions_venn reveals that most social group mentions in the analyzed party manifestos are characterized by at least one economic or non-economic attribute.
While about every fourth social group mention (26%) is predicted to contain no attributes at all and consider a "universal" group reference in our scheme,
30.3% of mentions feature at least one economic attribute,
and 48.8% feature at least one non-economic attribute.

Overall, we observe intersectionality in 15.6% of all mentions, including "universal" group references without attributes, and in 21% of all mentions that feature at least one attribute.
Further, 5.1% of mentions (6.9% of mentions with at least one attribute) are predicted to contain at least one attribute from both attribute dimensions, and thus exhibit cross-dimensional intersectionality.

In [None]:
# Calculate detailed intersectionality breakdowns for each dimension
def calculate_dimension_intersectionality(df, focal_attrs, other_attrs):
    """Calculate intersectionality patterns for< one attribute dimension."""
    
    # Outer circle: any attribute from focal dimension
    any_focal = df[focal_attrs].any(axis=1)
    
    subdf = df.loc[any_focal, focal_attrs]  # only consider mentions with at least 1 focal attribute
    
    # First inner circle: 2+ attributes within focal dimension (no other dimension)
    multi_focal_only = (subdf.sum(axis=1) >= 2) & (~df.loc[any_focal, other_attrs].any(axis=1))
    
    # Second inner circle: cross-dimensional (focal + other dimension)
    cross_dimensional = any_focal & df.loc[any_focal, other_attrs].any(axis=1)
    
    # Overlap: mentions with 2+ focal AND cross-dimensional
    overlap = (subdf.sum(axis=1) >= 2) & df.loc[any_focal, other_attrs].any(axis=1)
    
    total = len(subdf)
    
    return {
        'any_focal': any_focal.sum(),
        'multi_focal_only': multi_focal_only.sum(), 
        'cross_dimensional': cross_dimensional.sum(),
        'overlap': overlap.sum(),
        'total': total
    }

# Calculate for both dimensions
econ_intersect = calculate_dimension_intersectionality(df, econ_attrs, nonecon_attrs)
nonecon_intersect = calculate_dimension_intersectionality(df, nonecon_attrs, econ_attrs)

In [None]:
#| label: fig-attribute_intersectionality_by_dim_venn
#| output: true
#| fig-cap: "Attribute intersectionality patterns by attribute dimension.  The venn diagrams show the for each attribute dimension the share mentions that feature only one attribute (outer circle) versus those that feature multiple attributes, either of the same attribute dimension (within-dimensional intersectionality) or with at least one  attribute category of the other attribute dimension  (cross-dimensional intersectionality). The small intersecting area of these two inner circles in both diagrams represents mentions that both feature more than one attribute of the same dimension and at least one attribute of the other dimension."

def plot_dimension_venn(ax, data, dimension_name, colors):
    """Plot Venn diagram showing intersectionality patterns for one dimension."""
    
    # Calculate the regions:
    any_focal = data['any_focal']
    within_only = data['multi_focal_only']  # 2+ focal, 0 other
    cross_only = data['cross_dimensional'] - data['overlap']  # cross but not multi-focal
    both_types = data['overlap']  # 2+ focal AND cross-dimensional
    
    # Single focal attribute only (any focal - within - cross)
    single_only = any_focal - within_only - data['cross_dimensional']
    
    # For venn3: A=any_focal, B=within_intersectional, C=cross_dimensional
    # The regions are:
    # - Single only: A but not B and not C
    # - Within only: A and B but not C  
    # - Cross only: A and C but not B
    # - Both types: A and B and C
    
    subsets = (
        single_only,     # Abc (100) - only A: single focal attribute only
        0,               # aBc (010) - only C: impossible (can't have cross without any focal)
        cross_only,      # ABc (110) - A and C: cross-dimensional but not within
        0,               # abC (001) - only B: impossible (can't have within without any focal)
        within_only,     # AbC (101) - A and B: within-dimensional only
        0,               # aBC (011) - B and C: impossible (can't have both without any focal)
        both_types       # ABC (111) - all three: both within and cross
    )
    
    v = venn3(subsets=subsets,
              set_labels=(None, # f'Any {dimension_name}', 
                         'cross-dimensional',
                         'within-dimensional',
                         ),
              ax=ax)
    # Reduce font size of set labels
    for label in v.set_labels:
        if label is not None:
            label.set_fontsize(9)  # or whatever size you prefer
    

    # Customize colors
    if v.get_patch_by_id('100'):  # single only
        v.get_patch_by_id('100').set_color(colors['single'])
        v.get_patch_by_id('100').set_alpha(0.7)
    if v.get_patch_by_id('110'):  # cross only  
        v.get_patch_by_id('110').set_color(colors['cross'])
        v.get_patch_by_id('110').set_alpha(0.7)
    if v.get_patch_by_id('101'):  # within only
        v.get_patch_by_id('101').set_color(colors['within'])
        v.get_patch_by_id('101').set_alpha(0.7)
    if v.get_patch_by_id('111'):  # both types
        v.get_patch_by_id('111').set_color(colors['both'])
        v.get_patch_by_id('111').set_alpha(0.7)
    
    # Add percentage labels
    total = data['total']
    label_map = {
        '100': (single_only, 'Single only'),
        '110': (cross_only, 'Cross only'),  
        '101': (within_only, 'Within only'),
        '111': (both_types, 'Both types')
    }
    
    for region_id, (count, name) in label_map.items():
        label = v.get_label_by_id(region_id)
        if label and count > 0:
            pct = (count / total) * 100
            label.set_text(f'{pct:.1f}%')
            label.set_fontsize(8)
            # label.set_fontweight('bold')
    
    ax.set_title(f'{dimension_name} attributes')

# Create the visualization
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(7, 4), dpi=200)

# Plot economic dimension
plot_dimension_venn(ax1, econ_intersect, 'Economic', econ_colors)

# Plot non-economic dimension  
plot_dimension_venn(ax2, nonecon_intersect, 'Non-economic', nonecon_colors)

plt.tight_layout()
plt.show()

In [None]:
#| eval: false
HTML(attr_intersectionality_breakdown_tab.round(3).to_html())

In [None]:
32558/209351

@fig-attribute_intersectionality_by_dim_venn breaks down these numbers by attribute dimensions.
It shows that among the 30.3% of mentions featuring at least one economic attribute, 21.9% of mentions are intersectional and combine multiple economic attributes (5.0%) at least one economic with at least one non-economic attribute (16%), or both (0.9%).
Similarly, among the 48.8% of mentions featuring at least one non-economic attribute, 28.7% of mentions are intersectional and combine multiple attributes from within the same attribute dimension (18.2%), combine with at least one economic attribute (9.4%), or both (1.1%).

Interestingly, within-dimension intersectionality is more common for non-economic attributes.
In particilar, for economic attributes, interesctionality is about four times more likely to be cross-dimensional than within-dimensional.
Viewed together with the higher prevalence of non-economic attributes (@fig-attribute_dimensions_venn), this underscores the importance of non-economic attributes in parties' group mentions.

<!-- These numbers highlight that intersectionality is an important phenomenon in parties' group mentions.
While in both attributes
Moreover, we will show further below that comparing intersectionality patterns across party families can reveals important differences in their group focus strategies. -->

We turn next to the prevalence of specific attribute categories, that is, the share of social group mentions that are predicted to feature a given attribute category.
We exclude universal group references from this analysis to focus on the prevalence of specific attributes.

In [None]:
# NOTE: subsetting to **non-universal attributes**
df_any = df[df[label_cols].any(axis=1)]

In [None]:
#| label: fig-attribute_prevalence_overall
#| output: true
#| fig-cap: "Prevalence of attribute categories across all social group mentions (excluding universal mentions with no specific attributes). Bars show the share of mentions featuring each attribute, with 95% confidence intervals. Top panel: economic attributes; bottom panel: non-economic attributes."

# Compute overall prevalence
econ_prev = compute_prevalence(df_any, econ_attrs)
nonecon_prev = compute_prevalence(df_any, nonecon_attrs)

# Map to readable names
econ_prev['attribute'] = econ_prev['attribute'].map(attribute_category_names_map)
nonecon_prev['attribute'] = nonecon_prev['attribute'].map(attribute_category_names_map)

# Prepare subplot layout with dynamic heights
n_econ = econ_prev.attribute.nunique()
n_nonecon = nonecon_prev.attribute.nunique()
r_ = 0.175
fig_height = r_ * n_econ + r_ * n_nonecon + 2
fig, axes = plt.subplots(
    nrows=2,
    ncols=1,
    figsize=(10, fig_height),
    gridspec_kw={'height_ratios': [n_econ, n_nonecon], 'hspace': 0.2},
    sharex=True
)

# Plot
plot_prevalence_bars(econ_prev.sort_values("prevalence", ascending=False), axes[0], 'Economic attributes')
plot_prevalence_bars(nonecon_prev.sort_values("prevalence", ascending=False), axes[1], 'Non-economic attributes')

plt.tight_layout()
plt.show()

@fig-attribute_prevalence_overall shows the overall prevalence of each attribute category across all social group mentions that contain at least one attribute.
Economic attributes are shown in the top panel, non-economic attributes in the bottom panel.
Overall, the most prevalent attribute categories are _occupation/profession_ and _nationality_.
In the economic dimension, the second and third most prevalent attributes -- _employment status_ and _income/wealth/economic status_ -- are relatively far less prevalent than the second and third most prevalent non-economic attributes _age_ and _family_, which showing substantially higher prevalences.
This disparity may partially reflect the lower prevalence of within-dimension intersectionality among economic attributes compared to non-economic attributes (see @fig-attribute_intersectionality_by_dim_venn), where, as shown further below, categories like _family_ and _age_ frequently co-occur in the same mentions.

To gain a more granular understanding of interesectionality patterns, we next compare attribute categories' co-occurrences.
In particular, we focus on how frequently each attribute category appears alone in a group mention versus in combination with other attributes and therefore _intersectionally_.

In [None]:
# Filter to mentions with ≥1 economic attribute
attr_presence = df[[c for c in econ_attrs + nonecon_attrs if c in df.columns]].apply(lambda col: binarize_column(col))
has_any_attrs = attr_presence.sum(axis=1) >= 1
df_with_attrs = df[has_any_attrs].copy()

# Compute co-occurrence breakdown
cooc_breakdown = compute_cooccurrence_breakdown(df_with_attrs, econ_attrs + nonecon_attrs)

econ_attr_names = [attribute_category_names_map[a] for a in econ_attrs]
nonecon_attr_names = [attribute_category_names_map[a] for a in nonecon_attrs]

In [None]:
#| label: fig-attribute_cooccurrence
#| output: true
#| fig-cap: "Co-occurrence patterns of attribute categories in social group mentions. Heatmap cells show the share of mentions where each focal attribute (rows) co-occurs with other attributes (columns). The first column ('mentioned alone') shows the share of mentions where the focal attribute appears without any other attributes. Top panel: economic attributes; bottom panel: non-economic attributes. Values below 0.01 are not displayed."

heatmap_data = cooc_breakdown.pivot_table(
    index='focal_attr',
    columns='cooccur_with',
    values='prevalence',
    aggfunc='first'
).fillna(0)

heatmap_data.rename(index=attribute_category_names_map, columns=attribute_category_names_map, inplace=True)
heatmap_data.rename(columns={'alone': 'mentioned alone'}, inplace=True)

fig, axes = plot_heatmap(
    x=heatmap_data,
    panel_groups=(["mentioned alone"], econ_attr_names, nonecon_attr_names),
    mask_diagonal=False,
    cmin=0.01,
    cmap='RdPu', #cmap='YlOrRd',
    clims=(0, 1.0),
    clegend_title='Prevalence of co-occurrence',
)
# make first x-label bold
axes[0].get_xticklabels()[0].set_fontweight('bold')
# set y-labels for panels
axes[0].set_ylabel("economic\n", fontweight='bold')
axes[1].set_ylabel("non-economic\n", fontweight='bold')
plt.show()

In [None]:
panel_groups=(["mentioned alone"], econ_attr_names, nonecon_attr_names)
type(panel_groups)

@fig-attribute_cooccurrence reveals substantial variation in how different attributes tend to be used alone versus intersectionally.
Among economic attributes, _occupation/profession_ is mentioned most frequently as the only attribute of a social group (82%), which is about 1.3 times more often than the economic attribute that is most frequently used intersectionally: _employment status_ (63%).
Among non-economic attributes, _gender/sexuality_ is mentioned most frequently alone (82%) -- almost 2.5 times more often than _place/location_ (33%).
<!-- 
Viewed together with attribute categories' prevalence (@fig-attribute_prevalence_overall), we find that some attributes are more likely to be used in an intersectional way than others, and that this pattern varies across economic and non-economic dimensions.
For example, _family_ is featured in almost four times as many group mentions as _place/location_, but both attribute categories have a very similar probability of being used intersectionally.
In contrast, _gender/sexuality_ and _shared values/mentalities_ are featured in a similar number of group mentions, but the former is more likely to be used alone than the latter. -->

In [None]:
tmp = cooc_breakdown.query("cooccur_with!='alone'")
tmp['dim'] = tmp['focal_attr'].str.extract(r"^([^_]+)__.+")
tmp['occur_within'] = tmp['focal_attr'].str.extract(r"^([^_]+)__.+") == tmp['cooccur_with'].str.extract(r"^([^_]+)__.+")
tmp['occur_within'] = tmp['occur_within'].map({True: 'within', False: 'across'})
tmp = tmp.groupby(['dim', 'occur_within'])['count'].sum().reset_index()
tmp = tmp.groupby(['dim']).apply(lambda x: x.assign(share=x['count']/x['count'].sum())).reset_index(drop=True)
tmp = tmp.pivot_table(index='dim', columns="occur_within", values="share")
tmp.columns.names = [None]
tmp.sort_values(['dim', 'across'])

In [None]:
tmp = cooc_breakdown.query("cooccur_with!='alone'")
tmp['dim'] = tmp['focal_attr'].str.extract(r"^([^_]+)__.+")
tmp['occur_within'] = tmp['focal_attr'].str.extract(r"^([^_]+)__.+") == tmp['cooccur_with'].str.extract(r"^([^_]+)__.+")
tmp['occur_within'] = tmp['occur_within'].map({True: 'within', False: 'across'})
tmp = tmp.groupby(['dim', 'focal_attr', 'occur_within'])['count'].sum().reset_index()
tmp = tmp.groupby(['dim', 'focal_attr']).apply(lambda x: x.assign(share=x['count']/x['count'].sum())).reset_index(drop=True)
tmp = tmp.pivot_table(index=['dim', 'focal_attr'], columns="occur_within", values="share").reset_index()
tmp.columns.names = [None]
tmp = tmp.sort_values(['dim', 'across'])

# Map focal_attr to readable names
tmp['focal_attr_label'] = tmp['focal_attr'].map(attribute_category_names_map)
tmp

@fig-attribute_cooccurrence also allows examining intersectionality patterns by focusing on the two right-most heatmap columns that show attributes' co-occurrence tendencies.[^fn:top_cpr_examples]
This view reveals that most of economic within-dimension intersectionality is driven by the co-occurrence of _employment status_ with other economic attributes.
Conversely, we have noted above that within-dimensional intersectionality is more common for non-economic attributes than cross-dimensional intersectionality.
@fig-attribute_cooccurrence shows that this is driven by the very frequent co-occurrence of _age_ and _family_, and of _ethnicity_, _place/location_, or _religion_  with _nationality_.

[^fn:top_cpr_examples]: 
    @fig-attribute_interesectionality and @fig-attribute_cpr_overall report alternative visual presentations of co-occurrence patterns. 
    @fig-attribute_interesectionality analyzes co-occurrence rates in the population of "potentially intersectional" mentions by subsetting the data to all group mentions that feature at least two attributes.
    @fig-attribute_cpr_overall analyzes co-occurrence patterns using _conditional probabilities_ of co-occurrence for all attribute category pairs.
    @tbl-top_cpr_examples shows concrete examples sampled from our corpus.

Regarding cross-dimension intersectionality, economic attributes are frequently combined with _age_ (especially _education_), _family_, _health_, and _nationality_, while the non-economic attribute _health_ is often combined with _income/wealth/economic status_ and _place/location_ and _shared values/mentalities_ are often combined with _occupation/profession_.

<!--
Further, @fig-attribute_interesectionality thus shows that the rate of within- vs. cross-dimension intersectionality varies substantially across attributes categories.
Among economic attributes, _employment status_ is about as likely to be combined with economic as with non-economic attributes.
In contrast, _education level_ is about four times more likely to be combined with an economic attribute than with a non-economic attribute.
Regarding non-economic attributes, the ratio of within- to cross-dimension intersectionalitys varies between 32:1 (_religion_) and 2:1 (_gender/sexuality_ and _health_, respectively).
The exception is the _shared values/mentalities_ attribute, which is mentioned about as frequently together with economic attributes as with other non-economic attributes.
-->

These patterns revealed by @fig-attribute_cooccurrence are particularly interesting because they add depth to the analysis of parties' group attribute focus.
For example, while social group mentions featuring desscriptions of _shared values/mentalities_ are comparatively rare, their tendency to co-occur with _occupation/profession_  and _nationality_ suggests that parties use such attributes to differentiate their group focus strategies and specify which kind of people they mean exactly when referring to groups.

# APPENDIX

## Universal group references

In [None]:
#| cache: true
vectorizer = CountVectorizer(
    stop_words=stopwords.words('english'),
    ngram_range=(1, 3), 
    max_df=0.8,
    min_df=5
)

# Split data into universal (neither) vs. any attributes
df["is_universal"] = ~df[label_cols].any(axis=1)
df_universal = df[["text", "is_universal"]].copy()
del df["is_universal"]

fw_universal = compute_fighting_words(
    l1=df_universal.loc[ df_universal["is_universal"], 'text'].tolist(),  # universal mentions
    l2=df_universal.loc[~df_universal["is_universal"], 'text'].tolist(),  # mentions with any attributes
    cv=vectorizer,
)

fw_universal = pd.DataFrame(fw_universal, columns=['word', 'score']).sort_values('score', ascending=False)

In [None]:
#| label: fig-fw_universal_mentions
#| output: true
#| fig-cap: "Most distinctive words for mentions with no specific attributes (_universal_ mentions, left) vs. mentions with at least one economic or non-economic attribute (right). Values plotted are $z$-scores from \"fighting words\" analysis. Values above ±1.96 (vertical dashed line) can be considered significantly distinctive."

# Get top 20 lowest (most negative) and highest (most positive) scores
top_negative = fw_universal.nsmallest(20, 'score').sort_values('score', ascending=False)
top_positive = fw_universal.nlargest(20, 'score').sort_values('score', ascending=True)

# Create two-column layout
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(11, 4), sharey=False)

# Left plot: positive scores (distinctive for universal mentions)
ttl = r"universal mentions (no specific attributes)"
ax1.axvline(x=1.96, color='black', linestyle='--', linewidth=0.8, zorder=1)
ax1.barh(range(len(top_positive)), top_positive['score'], color='#1b9e77', zorder=2)
ax1.set_yticks(range(len(top_positive)))
ax1.set_yticklabels(top_positive['word'])
ax1.set_xlabel('z-score', fontsize=11)
ax1.set_title(ttl, fontweight='bold', fontsize=12)
ax1.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
ax1.yaxis.tick_right()
ax1.yaxis.set_label_position('right')
plt.setp(ax1.get_yticklabels(), ha='left')
ax1.set_xlim(0, 120)
ax1.invert_xaxis()  # Invert to show positive values extending left

# Right plot: negative scores (distinctive for mentions with attributes)
ttl = r"mentions with specific attributes"
ax2.axvline(x=-1.96, color='black', linestyle='--', linewidth=0.8, zorder=1)
ax2.barh(range(len(top_negative)), top_negative['score'], color='#d95f02', zorder=2)
ax2.set_yticks(range(len(top_negative)))
ax2.set_yticklabels(top_negative['word'])
ax2.set_xlabel('z-score', fontsize=11)
ax2.set_title(ttl, fontweight='bold', fontsize=12)
ax2.set_xlim(-120, 0)
ax2.invert_xaxis()  # Invert to show negative values extending left

plt.tight_layout()
plt.show()

We consider predicted group mentions that are predicted to contain no attributes as "universal" group references, that is, references to groups in general without specifying which kind of people they mean exactly.
To validate, that the predicted absence of attributes in these mentions indeed reflects this conceptual category, we examine the most distinctive $n$-grams of these mentions by applying the "fighting words" method [@monroe_fightin_2008] to all group mentions in our corpus, using a binary indicator of whether a mention is predicted to be a "universal" group reference as grouping variable.

@fig-fw_universal_mentions shows the top 20 most distinctive $n$-grams of "universal" group references and compares them 
The set of most distinctively "universal" terms includes tokens like "people", "society", "everyone", and similar terms that are used as generic collectivisms in the English language.
The most distinctively non-"universal" terms, in turn, include denotational group labels like "women", "students", "citizens", and "family".

In [None]:
#| label: tbl-fw_universal_mentions_examples
#| output: false
#| tbl-cap: "Examples of _universal_ group references. Values computed by summing \"fighting words\" scores as weights of mentions' tokens, normalized by number of tokens."
fw_lookup = {r['word']: r['score'] for r in fw_universal.to_dict(orient='records')}
fw_vals = np.array([fw_lookup[f] for f in vectorizer.get_feature_names_out()])
analyzer = vectorizer.build_analyzer()

# vectorize mentions 
universal_mentions = df_universal[df_universal['is_universal']].reset_index(drop=True)
mentions_texts = universal_mentions['text']

X_mentions = vectorizer.transform(mentions_texts.tolist())
# binarize
X_mentions[X_mentions>0] = 1
# apply z-score values to each row in `X_mentions` as weights
X_mentions_scores = X_mentions @ fw_vals[:, np.newaxis]
# normalize for mention length
X_mentions_scores /= X_mentions.sum(axis=1)

universal_mentions['score'] = X_mentions_scores[:, 0]
# rank = universal_mentions['score'].argsort()[::-1]

tab = universal_mentions[universal_mentions['score']>1.96]
tab['text_norm'] = tab['text'].apply(lambda x: ' '.join(analyzer(x)).strip())
tab = tab.drop_duplicates('text_norm').sort_values('score', ascending=False).head(400)

n_ = 20
tab = tab.sample(n_, weights=tab['score'].abs()**2, random_state=42)

tab = tab[['text', 'score']].sort_values('score', ascending=False).reset_index(drop=True)
tab.columns = ["Mention", "$z$-score"]

# TODO: make latex table
latex_table(tab)

## Attribute category co-occurrence analyses

In [None]:
#| eval: true
#| label: tbl-n_attributes_distribution
#| output: true
#| tbl-cap: "Distribution of social group mentions by number of attributes. Rows indicate the number of attributes predicted for each mention. Columns report absolute counts ($N$) and proportions (share) for overall attributes (combining economic and non-economic), economic attributes only, and non-economic attributes only."

n_attrs_stats = df[econ_attrs + nonecon_attrs].sum(axis=1).value_counts().sort_index().to_frame(name='count')
n_attrs_stats['prop'] = (n_attrs_stats['count'] / n_attrs_stats['count'].sum()).round(3)

n_econ_attrs_stats = df[econ_attrs].sum(axis=1).value_counts().sort_index().to_frame(name='count')
n_econ_attrs_stats['prop'] = (n_econ_attrs_stats['count'] / n_econ_attrs_stats['count'].sum()).round(3)

n_nonecon_attrs_stats = df[nonecon_attrs].sum(axis=1).value_counts().sort_index().to_frame(name='count')
n_nonecon_attrs_stats['prop'] = (n_nonecon_attrs_stats['count'] / n_nonecon_attrs_stats['count'].sum()).round(3)

# TODO: inspect extreme examples with ≥4 attributes
combined_stats = pd.concat([n_attrs_stats, n_econ_attrs_stats, n_nonecon_attrs_stats], axis=1)
combined_stats.fillna(0, inplace=True)
combined_stats["count"] = combined_stats["count"].astype(int)
combined_stats.columns = pd.MultiIndex.from_tuples(
    [
        [l1, l2]
        for l1 in ["overall", "economic", "non-economic"]
        for l2 in ["$N$", "share"]
    ],
)
combined_stats.index.name = "$N$ attributes"

latex_table(combined_stats, index=True)

We argue that intersectionality in parties' group mentions is an interesting facet of their group focus strategies.
In this context, the question arises how to compare interesectionality patterns between groups.
In our analysis, a key question along this line is whether PRR vs. Green parties combine attributes differently?

There are multiple ways to quantify and compare attribute co-occurrence patterns.
Each approach has its strengths and weaknesses.
Below, we discuss four possible approaches and provide recommendations for their use.

- **Comparing conditional probabilities**:
    We can compute $\Pr(\text{attribute B} \mid \text{attribute A})$ by party family and compare the values.
    Conditional probabilities have the advantage that they are very interpretable, allowing statements like "When PRRP mentions class, 12% also mention gender."
    They thus directly answers substantive questions about co-occurrence patterns.
    Further, they do not suffer from base rate sensitivity issues like the PMI (see below).
    Subtracting the values for Green parties from thjose for PRR parties, for example, we obtain an indicator that is negative if PRR parties tend to combine the given attributs more frequently.
    Conditional probability differences can thus be compared across parties through simple subtraction, and the approach works well even with sparse data.
    
    The downside is that the measure is asymmetric, requiring careful interpretation. 
    Further, it does not account for statistical significance of observed differences.
    
    We therefore use it solely for _descriptive_ comparison of party families' attribute combination strategies.

- **Comparing statistical significance**:
    We can apply $\chi^2$ or Fisher's exact tests for each attribute pair to determine whether co-occurrence patterns differ significantly between party families.
    These tests provide formal hypothesis testing and control for sampling variability. 
    Effect size measures, such as Cramér's $V$, in turn, allow assessing practical significance beyond mere statistical significance.
    <!-- , and Fisher's exact test works reliably even with small cell counts. -->
    
    However, multiple comparison problems arise when testing many pairs simultaneously, requiring correction procedures like Bonferroni adjustment. 
    The tests are also sensitive to sample size, meaning that with large N, nearly everything becomes statistically significant. 
    Further, binary yes/no decisions do not capture the magnitude of differences.
    
    We therefore use significance testing for determining which attribute pair differences are statistically robust.

- **Comparing normalized Pointwise Mutual Information (nPMI)**:
    We can compute nPMI values by party family, which compare observed to expected co-occurrence under statistical independence.
    nPMI identifies unexpected patterns in both directions (positive associations where attributes co-occur more than expected, and negative associations where they co-occur less than expected). 
    Being normalized to a [-1, +1] scale, it allows comparing different attribute pairs.
    This makes the nPMI metric useful for exploratory analysis.
    
    However, the measure is hard to interpret substantively in terms of party strategy. 
    It is sensitive to base rates, and negative values tend to dominate in sparse data (as we observed in our analysis). 
    Additionally, differences between parties can be small even when the underlying patterns differ substantially.
    
    We therefore use nPMI primarily for identifying which attribute pairs warrant further investigation.


In [None]:
#| label: fig-attribute_interesectionality
#| output: true
#| fig-cap: "Co-occurrence patterns of attribute categories in intersectional social group mentions, that is, mentions that combine at least two attributes. Heatmap cells show the share of mentions where each focal attribute (rows) co-occurs with other attributes (columns). Top panel: economic attributes; bottom panel: non-economic attributes. *Note:* Values below 0.01 are not displayed."

#TODO: consider moving this in the 
# Filter to mentions with ≥1 economic attribute
attr_presence = df[[c for c in econ_attrs + nonecon_attrs if c in df.columns]].apply(lambda col: binarize_column(col))
has_multi_attrs = attr_presence.sum(axis=1) >= 2
df_with_attrs = df[has_multi_attrs].copy()

# Compute co-occurrence breakdown
cooc_breakdown = compute_cooccurrence_breakdown(df_with_attrs, econ_attrs + nonecon_attrs)
cooc_breakdown = cooc_breakdown.query("cooccur_with!='alone'")

# Create pivot table for heatmap: focal_attr × cooccur_with
heatmap_data = cooc_breakdown.pivot_table(
    index='focal_attr',
    columns='cooccur_with',
    values='prevalence',
    aggfunc='first'
).fillna(0)

heatmap_data.rename(index=attribute_category_names_map, columns=attribute_category_names_map, inplace=True)

fig, axes = plot_heatmap(
    x=heatmap_data,
    panel_groups=(econ_attr_names, nonecon_attr_names),
    mask_diagonal=True,
    cmin=0.01,
    cmap='RdPu', #cmap='YlOrRd',
    clims=(0, 1.0),
    clegend_title='Prevalence of co-occurrence',
)
axes[0].set_ylabel("economic\n", fontweight='bold')
axes[1].set_ylabel("non-economic\n", fontweight='bold')
plt.show()

@fig-attribute_interesectionality allows examining specific intersectionality patterns by focusing on the 21.6% of non-universal mentions that feature at least two attributes and are thus intersectional.
The heatmap reveals several notable patterns in how parties combine different attribute categories when making intersectional group references.

Among economic attributes, _employment status_ emerges as the most "connective" attribute, frequently co-occurring with other economic categories such as _income/wealth/economic status_ and _occupation/profession_, as well as with non-economic attributes like _age_ and _health_.
In contrast, _education level_ shows more selective co-occurrence patterns, primarily combining with _age_ and _nationality_.

The non-economic attribute patterns show even stronger associations.
_Age_ and _family_ exhibit particularly high co-occurrence rates, potentially reflecting parties' tendency to frame generational concerns within family contexts (e.g., "young families").
Similarly, _ethnicity_, _nationality_, and _religion_ form a tight cluster of frequently combined attributes, suggesting parties often invoke multiple aspects of cultural identity simultaneously.
_Place/location_ also frequently appears with _nationality_, indicating geographic and national identity are often linked in party discourse.

Cross-dimensional intersectionality patterns reveal strategic combinations that bridge economic and non-economic concerns.
_Health_ serves as a bridge attribute, frequently combined with _income/wealth/economic status_, possibly reflecting parties' attention to health inequalities.
_Shared values/mentalities_ often co-occurs with _occupation/profession_, suggesting parties frame certain occupational groups through ideological lenses.

Notably, some attributes rarely appear together even in intersectional mentions.
_Gender/sexuality_ shows relatively low co-occurrence with most economic attributes, potentially indicating that parties treat gender concerns as distinct from economic policy domains.
These patterns suggest that while parties do engage in intersectional group appeals, they follow certain discursive templates that systematically combine some attributes while keeping others separate.



In [None]:
attr_presence = df[[c for c in econ_attrs + nonecon_attrs if c in df.columns]].apply(lambda col: binarize_column(col))

has_any_attrs = attr_presence.sum(axis=1) >= 1
df_with_attrs = df[has_any_attrs].copy()

# Compute associations for all attributes (within and across dimensions)
assoc_all = compute_attribute_associations(df_with_attrs, econ_attrs + nonecon_attrs)

# Extract key measures
cpr_all = assoc_all["p_b_given_a"]
pmi_all = assoc_all["pmi"]  # PMI
ppmi_all = assoc_all["ppmi"]  # Positive PMI (co-occurrence above chance)
npmi_all = assoc_all["npmi"]  # Normalized PMI (range [-1, 1])

### Conditional probabilities of co-occurrence

An alternative approach to quantatively describing attribute co-occurrence patterns is to compute the conditional probabilities of co-occurrence.
We can compute $\Pr(\text{attribute B} \mid \text{attribute A})$ for attribut category pairs and compare these values.
Conditional probabilities have the advantage that they are very interpretable, allowing statements like "When a party uses economic status to describe a group, the propbability that it also uses gemnder/sexuality in the groip mention is 0.12."
Conditional probabilities thus directly answers substantive questions about co-occurrence patterns.
Further, they do not suffer from base rate sensitivity issues.
    
The downside is that the measure is asymmetric, requiring careful interpretation. 
Further, it does not account for statistical significance of observed differences.
We therefore use it solely for _descriptive_ comparison of party families' attribute combination strategies.

In [None]:
#| label: fig-attribute_cpr_overall
#| output: true
#| fig-cap: 'Conditional probabilities of attribute co-occurrence in social group mentions. Heatmap cells show P(B|A), the probability of mentioning attribute B given that attribute A is mentioned. Rows indicate "attribute A" (the conditioning attribute) and columns indicate "attribute B" (the outcome attribute). *Note:* Values below 0.01 are not displayed.'

cpr_all_renamed = cpr_all.rename(index=attribute_category_names_map, columns=attribute_category_names_map)

fig, axes = plot_heatmap(
    x=cpr_all_renamed,  # Your data with readable labels
    panel_groups=(econ_attr_names, nonecon_attr_names),
    cluster_rows=False,
    cluster_cols=False,
    cmin=0.01,
    cmap='RdPu', #cmap='YlOrRd',
    clims=(0, 1),
    clegend_title='conditional probability'
)
axes[0].set_ylabel("economic\n", fontweight='bold')
axes[1].set_ylabel("non-economic\n", fontweight='bold')
plt.show()

In [None]:
# pivot longer:
cpr_values = cpr_all.copy()
# set upper triangle and diagonal to NaN to avoid duplicates
cpr_values.values[np.triu_indices_from(cpr_values.values)] = np.nan
cpr_values = cpr_values.reset_index().melt(id_vars='attr_a', var_name='attr_b', value_name='value')
cpr_values.dropna(subset=['value'], inplace=True)
cpr_values.sort_values('value', ascending=False, inplace=True)

# Create DataFrame with top 10 nPMI combinations and example mentions
top_cpr_examples = []

for a, b, v in cpr_values.query("value > 0.1").head(10).itertuples(index=False):

    # Find mentions where both attributes are present
    idxs = df[[a, b]].apply(lambda col: binarize_column(col), axis=0).sum(axis=1) == 2
    exclude = df.loc[idxs, label_cols].sum(axis=1) != 2
    idxs = idxs & ~exclude
    n_examples = min(5, idxs.sum())
    
    # Sample random examples
    examples = df.loc[idxs].sample(n=n_examples, random_state=42)['text'].tolist()
    
    # Create row
    example_row = {
        'attr_a': attribute_category_names_map.get(a, a),
        'attr_b': attribute_category_names_map.get(b, b),
        'cpr': v,
        'ex1': examples[0],
        'ex2': examples[1],
        'ex3': examples[2],
        'ex4': examples[3],
        'ex5': examples[4]
    }
    
    top_cpr_examples.append(example_row)

# Create DataFrame
df_top_cpr_examples = pd.DataFrame(top_cpr_examples)
df_top_cpr_examples.columns=['attribute a', 'attribute b', 'Pr(b | a)'] + [f'example {i}' for i in range(1, 6)]

In [None]:
#| label: tbl-top_cpr_examples
#| output: true
#| tbl-cap: "Top attribute combinations by conditional probability $Pr(b | a)$, that is, the probability of attribute $b$ being present in a mention given that attribute $a$ is present. Only combinations with $Pr(b | a) > 0.1$ are included. Example mentions are randomly sampled from mentions that feature both attributes and no other attributes (i.e., mentions that only feature these two attributes)."

latex_table(df_top_cpr_examples)

<!--
**Strong positive associations (green cells, nPMI > 0.4)** indicate that when parties mention groups with one attribute, they systematically combine it with another:
The strongest association is between *place/location* and *religion* (nPMI = 0.65), suggesting parties often characterize religious groups by geographic identity (e.g., "Muslims in our cities").
*Ethnicity* and *religion* also strongly co-occur (nPMI = 0.54), reflecting how parties conflate ethnic and religious identities in their group appeals.
Among economic attributes, *income/wealth/economic status* and *employment status* co-occur (nPMI = 0.44), indicating parties link material conditions to labor market position.
*Ecology of group* and *shared values/mentalities* associate positively (nPMI = 0.41), suggesting parties frame group characteristics through both environmental and ideological lenses.
*Age* and *family* co-occur (nPMI = 0.36), as parties often connect generational identities to family structures.

**Strong negative associations (magenta cells, nPMI < -0.6)** reveal systematic mutual exclusion, where attributes rarely appear together:
*Ecology of group* shows strong negative associations with multiple attributes including *family* (-0.75), *education level* (-0.70), *class membership* (-0.66), and *ethnicity* (-0.66), suggesting ecological characterizations constitute a distinct mode of group appeals that excludes other dimensions.
*Gender/sexuality* and *class membership* are mutually exclusive (-0.74), indicating parties rarely combine gender/sexual identity with class-based characterizations.
Economic status dimensions avoid religious characterizations: *income/wealth/economic status* ↔ *religion* (-0.71) and *employment status* ↔ *place/location* (-0.72).
*Ethnicity* and *class membership* rarely co-occur (-0.68), suggesting parties treat ethnic and class identities as alternative frames rather than intersecting dimensions.

**Implications for party communication strategies:**
These patterns reveal that parties employ distinct "templates" for group characterization: religious-geographic appeals, economic-material appeals, and ecological-values appeals operate largely independently.
The mutual exclusion of gender/sexuality from economic attributes suggests parties compartmentalize identity politics and economic grievances rather than acknowledging their intersection.
The strong positive associations indicate established discursive shortcuts: when parties invoke certain attributes, they predictably combine them with specific others, reflecting broader cultural associations (e.g., ethnicity-religion) or analytical frameworks (e.g., income-employment).
The systematic avoidance of certain combinations (e.g., ecology with class, gender with economic status) points to blind spots in how parties conceptualize group diversity, potentially missing important intersectional identities like working-class women or economically marginalized ethnic minorities.
-->