In [None]:
import pandas as pd
import json
import os

In [None]:
# Define the path to your exported data
data_dir = "../topics"

# Load the main dataframe
topics_per_party_df = pd.read_csv(os.path.join(data_dir, "topics_per_party_analysis.csv"), index_col=0)

# Load the topic-party to speech_id mapping
with open(os.path.join(data_dir, "topic_party_to_speech_id_mapping.json"), 'r') as f:
    topic_party_mapping_raw = json.load(f)

# Convert back to the original format
topic_party_to_speech_id = {}
for key, speech_ids in topic_party_mapping_raw.items():
    # Parse the key "topic_X_party_Y"
    parts = key.split('_')
    topic = int(parts[1])
    party = parts[3]
    topic_party_to_speech_id[(topic, party)] = speech_ids

# Load other mappings
with open(os.path.join(data_dir, "topic_to_speech_id_mapping.json"), 'r') as f:
    topic_to_speech_id = {int(k): v for k, v in json.load(f).items()}

with open(os.path.join(data_dir, "speech_id_to_party_mapping.json"), 'r') as f:
    speech_id_to_party = json.load(f)

with open(os.path.join(data_dir, "analysis_metadata.json"), 'r') as f:
    metadata = json.load(f)

# Helper functions
def get_speech_ids_by_topic(topic_num):
    """Get all speech_ids for a given topic"""
    return topic_to_speech_id.get(topic_num, [])

def get_speech_ids_by_topic_and_party(topic_num, party):
    """Get speech_ids for a specific topic and party combination"""
    return topic_party_to_speech_id.get((topic_num, party), [])

def get_speech_ids_by_party_in_topic(topic_num):
    """Get speech_ids organized by party for a given topic"""
    result = {}
    all_speech_ids = topic_to_speech_id.get(topic_num, [])
    
    for speech_id_val in all_speech_ids:
        if speech_id_val in speech_id_to_party:
            party = speech_id_to_party[speech_id_val]
            if party not in result:
                result[party] = []
            result[party].append(speech_id_val)
    
    return result

print("Data loaded successfully!")
print(f"Topics per party dataframe shape: {topics_per_party_df.shape}")
print(f"Total topic-party combinations: {len(topic_party_to_speech_id)}")
print(f"Total topics: {len(topic_to_speech_id)}")
print(f"Analysis covers Congress {metadata['analysis_info']['congress_range']}")