## 0) Imports
We import the libraries needed for exploration: `pandas` for data handling and `textwrap` for readable printing.

In [None]:
import pandas as pd
import textwrap

## 1) Check Progress of `gpt_score`
Loads the GPT-scored file and counts how many rows have missing/filled values.

In [None]:
# Load the CSV (repo-relative path)
csv_path = r"../../data/electoralTerm_19_scored.csv"
df = pd.read_csv(csv_path)

# Count non-empty and empty gpt_score values
filled_count = df['gpt_score'].notna().sum()
empty_count = df['gpt_score'].isna().sum()

print(f"✅ gpt_score filled: {filled_count} rows")
print(f"⏳ gpt_score missing: {empty_count} rows")

# Convert gpt_score to numeric
df['gpt_score'] = pd.to_numeric(df['gpt_score'], errors='coerce')

## 2) Check Progress of `gemini_score`
Loads the Gemini-scored file and counts how many rows are filled vs missing.

In [None]:
# Load the CSV (repo-relative path)
csv_path = r"../../data/electoralTerm_19_scored_gemini.csv"
df = pd.read_csv(csv_path)

# Count non-empty and empty gemini_score values
filled_count = df['gemini_score'].notna().sum()
empty_count = df['gemini_score'].isna().sum()

print(f"✅ gemini_score filled: {filled_count} rows")
print(f"⏳ gemini_score missing: {empty_count} rows")

# Print selected rows to look at results
print(df.loc[1000:1030, ['Party', 'gpt_score','gpt_4.1_score']])

## 3) Inspect Speech Content
Example of wrapping long text to view a single speech more comfortably.

In [None]:
# Get the speech content
speech = df.loc[509, 'speechContent']

# Wrap and print it with line breaks every 100 characters
wrapped = textwrap.fill(speech, width=100)
print(wrapped)

## 4) Example Analysis – Average Score for CDU/CSU
Converts `gemini_score` to numeric and computes the mean for CDU/CSU speeches.

In [None]:
# Ensure numeric just in case
df['gemini_score'] = pd.to_numeric(df['gemini_score'], errors='coerce')

# Filter for CDU/CSU (not "CDU")
cdu_df = df[(df['Party'] == "CDU/CSU") & (df['gemini_score'].notna())]

# Calculate the average
average_score = cdu_df['gemini_score'].mean()
print(f"📊 Average GPT score for CDU/CSU: {average_score:.2f}")