# Data Analysis & Output – Notebook

Combines GPT and Gemini scores, computes derived metrics, explores distributions/correlations, and produces figures.



## 0) Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr, spearmanr, kruskal
import itertools

# Plot settings (optional)
sns.set(rc={"figure.dpi": 120})

## 1) Load & Merge Data
Reads analysis, base, and Gemini score files. Merges into `df_combined` and adds derived columns.

In [None]:
## Define the file path (make sure to use raw string or double backslashes)
file_path = r"../../data/electoralTerm_19_analysis.csv"

## Load the CSV into a DataFrame
df = pd.read_csv(file_path)

## Load the base dataframe
base_path = r"../../data/electoralTerm_19_scored.csv"
df_base = pd.read_csv(base_path)

## Load the Gemini 2.0 scores
gemini_2_path = r"../../data/electoralTerm_19_scored_gemini.csv"
df_gemini_2 = pd.read_csv(gemini_2_path)[['id', 'gemini_score']].rename(columns={'gemini_score': 'gemini_2.0_score'})

## Load the Gemini 2.5 scores
gemini_2_5_path = r"../../data/electoralTerm_19_scored_gemini_2_5.csv"
df_gemini_2_5 = pd.read_csv(gemini_2_5_path)[['id', 'gemini_score']].rename(columns={'gemini_score': 'gemini_2.5_score'})

## Merge all dataframes on 'id'
df_combined = df_base.merge(df_gemini_2, on='id', how='left')
df_combined = df_combined.merge(df_gemini_2_5, on='id', how='left')

## Create calculated columns
df_combined['gpt_average'] = df_combined[['gpt_score', 'gpt_4.1_score']].mean(axis=1)  # average of gpt_score and gpt_4.1_score
df_combined['gpt_deviation'] = (df_combined['gpt_score'] - df_combined['gpt_4.1_score']).abs()  # deviation between gpt_score and gpt_4.1_score

df_combined['gemini_average'] = df_combined[['gemini_2.0_score', 'gemini_2.5_score']].mean(axis=1)  # average of gemini_2.0_score and gemini_2.5_score
df_combined['gemini_deviation'] = (df_combined['gemini_2.0_score'] - df_combined['gemini_2.5_score']).abs()  # deviation between gemini_2.0_score and gemini_2.5_score
df_combined['gpt_gemini_deviation'] = df_combined['gpt_average'] - df_combined['gemini_average']  # deviation between gpt_average and gemini_average

# Peek at structure
display(df_combined.shape)
display(df_combined[['gpt_score', 'gpt_4.1_score', 'gemini_2.0_score', 'gemini_2.5_score']].describe())
display(df_combined['Party'].value_counts())
print(df_combined.columns.tolist())

## 2) Descriptive Statistics per Model

In [None]:
## descriptive stats per model
# List of model score columns
model_cols = ['gpt_score', 'gpt_4.1_score', 'gemini_2.0_score', 'gemini_2.5_score']

# Basic descriptive statistics
descriptive_stats = df_combined[model_cols].describe().T  # Transpose for readability
display(descriptive_stats)

## 3) Distributions

In [None]:
## distribution plots
plt.figure(figsize=(12, 6))
for col in model_cols:
    sns.kdeplot(df_combined[col], label=col, fill=True, common_norm=False)
plt.title('Ideological Score Distributions by Model')
plt.xlabel('Ideological Score (1–10)')
plt.ylabel('Density')
plt.legend()
plt.tight_layout()
plt.show()

## 4) Inter-Model Correlation

In [None]:
## inter-model correlation
# Pearson correlation
correlation_matrix = df_combined[model_cols].corr(method='pearson')
print(correlation_matrix)

# Pairwise Pearson r, p, df
results = []
for col1, col2 in itertools.combinations(model_cols, 2):
    data1 = df_combined[col1].dropna()
    data2 = df_combined[col2].dropna()
    # Align by index after dropping NaNs
    df_temp = pd.concat([data1, data2], axis=1).dropna()
    r, p = pearsonr(df_temp[col1], df_temp[col2])
    df_value = len(df_temp) - 2  # degrees of freedom
    results.append((col1, col2, r, p, df_value))
corr_df = pd.DataFrame(results, columns=['Variable 1', 'Variable 2', 'r', 'p-value', 'df'])
print(corr_df)

# Heatmap
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', vmin=0.8, vmax=1)
plt.title('Pearson Correlation Matrix: Model Ideological Scores')
plt.tight_layout()
plt.show()

## 5) Model Agreement & Variability

In [None]:
## model agreement & variability
# Add a column for inter-model deviation
df_combined['model_std'] = df_combined[model_cols].std(axis=1)

# View basic stats
print(df_combined['model_std'].describe())

# Plot
sns.histplot(df_combined['model_std'], bins=30, kde=True)
plt.title('Distribution of Inter-Model Standard Deviation per Speech')
plt.xlabel('Standard Deviation Across Models')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()

## 6) Bland–Altman: GPT vs Gemini

In [None]:
## bland-altman plot of GPT vs Gemini
model_a = df_combined['gpt_average']
model_b = df_combined['gemini_average']

# Calculate means and differences
mean_scores = (model_a + model_b) / 2
diff_scores = model_a - model_b
mean_diff = np.mean(diff_scores)
std_diff = np.std(diff_scores)

# Plot
plt.figure(figsize=(10, 6))
plt.scatter(mean_scores, diff_scores, alpha=0.5)
plt.axhline(mean_diff, linestyle='--', label=f'Mean diff = {mean_diff:.2f}')
plt.axhline(mean_diff + 1.96*std_diff, linestyle='--', label='+1.96 SD')
plt.axhline(mean_diff - 1.96*std_diff, linestyle='--', label='-1.96 SD')
plt.title('Bland-Altman Plot: GPT vs Gemini')
plt.xlabel('Mean Score (GPT & Gemini)')
plt.ylabel('Difference (GPT - Gemini)')
plt.legend()
plt.tight_layout()
plt.show()

## 7) Speech Length vs Inter-Model Deviation

In [None]:
## length vs deviation
# Basic scatterplot with trendline
plt.figure(figsize=(10, 6))
sns.scatterplot(data=df_combined, x='count_words', y='model_std', alpha=0.3)
sns.regplot(data=df_combined, x='count_words', y='model_std', scatter=False)
plt.title('Relationship Between Speech Length and Inter-Model Deviation')
plt.xlabel('Word Count (count_words)')
plt.ylabel('Standard Deviation Across Models (model_std)')
plt.tight_layout()
plt.show()

# Correlation analysis
pearson_corr, pearson_p = pearsonr(df_combined['count_words'], df_combined['model_std'])
spearman_corr, spearman_p = spearmanr(df_combined['count_words'], df_combined['model_std'])
print(f"Pearson correlation: r = {pearson_corr:.3f}, p = {pearson_p:.3e}")
print(f"Spearman correlation: r = {spearman_corr:.3f}, p = {spearman_p:.3e}")

## 8) Party-Level Analysis & Plots

In [None]:
## party level analysis
score_cols = ['gpt_score', 'gpt_4.1_score', 'gemini_2.0_score', 'gemini_2.5_score']

party_stats = df_combined.groupby('Party')[score_cols].agg({
    'gpt_score': ['mean', 'std', 'median', lambda x: x.quantile(0.25), lambda x: x.quantile(0.75)],
    'gpt_4.1_score': ['mean', 'std', 'median', lambda x: x.quantile(0.25), lambda x: x.quantile(0.75)],
    'gemini_2.0_score': ['mean', 'std', 'median', lambda x: x.quantile(0.25), lambda x: x.quantile(0.75)],
    'gemini_2.5_score': ['mean', 'std', 'median', lambda x: x.quantile(0.25), lambda x: x.quantile(0.75)]
})
print(party_stats)

# Boxplot by party (note: plotting gemini_2.5_score per your script)
plt.figure(figsize=(14, 8))
sns.boxplot(data=df_combined, x='Party', y='gemini_2.5_score')
plt.title('Distribution of GPT-4.1 Ideological Scores by Party')
plt.ylabel('Ideological Score (1 = far left, 10 = far right)')
plt.xlabel('Political Party')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Group by party to inspect differences
model_diff = df_combined.groupby('Party')[['gpt_average', 'gemini_average', 'gpt_gemini_deviation']].mean().reset_index()
print(model_diff)

plt.figure(figsize=(10, 6))
for model in ['gpt_average', 'gemini_average']:
    plt.plot(model_diff['Party'], model_diff[model], marker='o', label=model)
plt.axhline(5, linestyle='--', alpha=0.7)
plt.ylabel('Average Score')
plt.xlabel('Party')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

# Radar plot
labels = model_diff['Party'].tolist()
models = ['gpt_average', 'gemini_average']
angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist()
angles += angles[:1]
fig, ax = plt.subplots(figsize=(8, 8), subplot_kw=dict(polar=True))
for model in models:
    values = model_diff[model].tolist()
    values += values[:1]
    ax.plot(angles, values, label=model)
    ax.fill(angles, values, alpha=0.1)
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)
ax.set_thetagrids(np.degrees(angles[:-1]), labels)
plt.title('Radar Plot of Average Ideological Scores by Party')
plt.legend(loc='upper right', bbox_to_anchor=(1.3, 1.1))
plt.tight_layout()
plt.show()

## 9) Normality Checks & Non-Parametric Test

In [None]:
## check for normality assumption
# Histogram with KDE: GPT-4.1 Scores
sns.histplot(df_combined['gpt_4.1_score'], kde=True)
plt.title("Histogram with KDE: GPT-4.1 Scores")
plt.xlabel("Ideological Score")
plt.ylabel("Frequency")
plt.show()

# Q-Q plot for GPT-4.1 scores
import scipy.stats as stats
stats.probplot(df_combined['gpt_4.1_score'], dist="norm", plot=plt)
plt.title("Q-Q Plot: GPT-4.1 Scores")
plt.show()

# Kruskal–Wallis across parties for GPT-4.1
groups = [df_combined[df_combined['Party'] == party]['gpt_4.1_score'] for party in df_combined['Party'].unique()]
stat, p = kruskal(*groups)
print(f"Kruskal-Wallis H = {stat:.3f}, p = {p:.4f}")