# Bill Position Datset Stats

In [1]:
import pandas as pd

In [2]:
bill_position_dataset = pd.read_csv('bill_position_dataset_input/bill_position_dataset.csv')

In [3]:
def print_annotation_summary(df):
    # Define sources and label orderings
    sources = ['llm', 'gnn']
    label_5_order = ['Support', 'Oppose', 'Amend', 'Monitor']
    label_3_order = ['Support', 'Oppose', 'Engage']
    
    for source in sources:
        # Filter data by annotation source (llm or gnn)
        df_source = df[df['annotation_source'] == source]
        num_lob = df_source['lob_id'].nunique()
        num_bill = df_source['bill_id'].nunique()

        print(f"\n=== Annotation Source: {source.upper()} ===")
        print(f"Unique Interest Groups: {num_lob}")
        print(f"Unique Bills:           {num_bill}")

        # Count label occurrences by class type
        if source == 'llm':
            label_counts = df_source['label_5_class'].value_counts()
            label_counts = label_counts.reindex(label_5_order, fill_value=0)
            print("\nLabel Counts (5-class):")
        elif source == 'gnn':
            label_counts = df_source['label_3_class'].value_counts()
            label_counts = label_counts.reindex(label_3_order, fill_value=0)
            print("\nLabel Counts (3-class):")
        
        print(label_counts.to_string())

    # Combined summary across both LLM and GNN annotations
    print("\n=== Annotation Source: OVERALL (Combined) ===")
    num_lob = df['lob_id'].nunique()
    num_bill = df['bill_id'].nunique()
    print(f"Unique Interest Groups: {num_lob}")
    print(f"Unique Bills:           {num_bill}")

    # Use 3-class labels for overall summary
    overall_counts = df['label_3_class'].value_counts()
    overall_counts = overall_counts.reindex(label_3_order, fill_value=0)
    print("\nLabel Counts (3-class overall):")
    print(overall_counts.to_string())


## Table 1. Size of bill position dataset

In [4]:
print_annotation_summary(bill_position_dataset)


=== Annotation Source: LLM ===
Unique Interest Groups: 6495
Unique Bills:           22820

Label Counts (5-class):
label_5_class
Support    25099
Oppose      5706
Amend       9317
Monitor    42299

=== Annotation Source: GNN ===
Unique Interest Groups: 11185
Unique Bills:           36824

Label Counts (3-class):
label_3_class
Support     54204
Oppose       5456
Engage     137023

=== Annotation Source: OVERALL (Combined) ===
Unique Interest Groups: 12032
Unique Bills:           42475

Label Counts (3-class overall):
label_3_class
Support     79303
Oppose      11162
Engage     188639
