In [None]:
# Imports and configuration
import os
import pandas as pd
import numpy as np

# Database and backend imports
from personas_backend.db.db_handler import DatabaseHandler
from personas_backend import ACTIVE_SCHEMA

# Evaluations package imports
from evaluations import data_access, table_demographics

# Configuration
SCHEMA = os.environ.get("default_schema", "personality_trap")
print(f"Using schema: {SCHEMA}")
print(f"Active schema: {ACTIVE_SCHEMA}")

In [None]:
# Database and backend imports
from personas_backend.db.db_handler import DatabaseHandler
# Connect to the database
db_handler = DatabaseHandler()
conn = db_handler.connection
conn

In [None]:
# Database access and table generation functions
from evaluations import data_access, table_demographics

# Database connection
from personas_backend.db.db_handler import DatabaseHandler

# Database schema to use (same as production data)
SCHEMA = "personality_trap"
ACTIVE_SCHEMA = SCHEMA

# Connect to local database
db_handler = DatabaseHandler()
conn = db_handler.connection

# Load population data using the package function
# This now uses the new model_print and population_print columns for clean nomenclature
with conn.connect() as connection:
    population_df = data_access.load_population(connection, schema=SCHEMA)

# Basic data quality checks
print(f"Total personas loaded: {len(population_df)}")
print(f"Unique models: {population_df['model_print'].nunique() if 'model_print' in population_df.columns else 'model_print column not found'}")
print(f"Unique populations: {population_df['population_print'].nunique() if 'population_print' in population_df.columns else 'population_print column not found'}")

# Remove rows with missing description word counts (data quality)
population_df = population_df.dropna(subset=["word_count_description"])
print(f"After removing missing word counts: {len(population_df)} personas")

population_df.head()

# Binary Demographic Analysis

This analysis treats each demographic value as an independent binary variable and performs statistical tests to compare proportions between populations.

## Approach:
1. **Transform categorical demographics into binary variables** - Each demographic value becomes a 0/1 variable
2. **Combine repetitions** - Concatenate all data from repetitions for each population
3. **Statistical testing** - Use z-test for proportions to compare binary outcomes between populations
4. **Handle edge cases** - Manage cases with zero variance appropriately

This approach provides clear, interpretable results for each demographic characteristic.

In [None]:
demographic_columns = ['gender', 'political_orientation', 'race', 'religious_belief', 'sexual_orientation']

# prepare the required dataframe for the analysis
ttest_df = population_df.copy()
# ttest_df = population_df.copy()[population_df['model'].isin(['claude35sonnet', 'llama323B', 'llama3170B'])]

ttest_df = ttest_df[ttest_df['repetitions'] <= 5]

# ttest_df['population_map'] = ttest_df['population'].map(population_mapping, na_action='ignore')

# map the values in the religious_belief column from Buddhist and Hinduist, to Others
ttest_df['religious_belief'] = ttest_df['religious_belief'].replace(
    {'Buddhist': 'Others', 'Hinduist': 'Others'}, regex=True)

ttest_df.groupby(['model', 'population', 'population_print'], as_index=False).size()

# Tables 1-3: Demographics by Model and Condition

This section generates the demographic tables (Tables 1-3) for the paper using the enhanced `evaluations.table_demographics` package.

## Statistical Approach: Binary Demographic Analysis

The package now uses a **binary demographic analysis** approach:

1. **Binary variables**: Each demographic value is treated as an independent binary variable (0/1)
2. **Combined repetitions**: All repetitions are concatenated for each population
3. **Z-test for proportions**: Statistical testing uses z-test for proportions (not t-test for means)
4. **Edge cases handled**: Zero variance, empty groups, and perfect separation are managed appropriately

## Significance Markers:
- `*` = p < 0.05
- `†` = p < 0.01  
- `‡` = p < 0.001

All comparisons are against the **Base** condition for each model.

In [None]:
# Prepare data: Filter to repetitions <= 5 and clean religious belief
demo_df = population_df.copy()
demo_df = demo_df[demo_df['repetitions'] <= 5]

# Map Buddhist and Hinduist to Others for consistency with paper
demo_df['religious_belief'] = demo_df['religious_belief'].replace(
    {'Buddhist': 'Others', 'Hinduist': 'Others'}, regex=True
)

print(f"Data prepared: {len(demo_df)} personas")
print(f"Models: {sorted(demo_df['model_print'].unique())}")
print(f"Conditions: {sorted(demo_df['population_print'].unique())}")

In [None]:
# Table 1: GPT-4o and GPT-3.5
print("=" * 80)
print("TABLE 1: GPT-4o and GPT-3.5 Demographics")
print("=" * 80)

TABLE1 = table_demographics.create_paper_table(
    demo_df,
    models=["GPT-4o", "GPT-3.5"]
)

print(f"\nTable 1 shape: {TABLE1.shape}")
print("\nTable 1:")
display(TABLE1)

In [None]:
# Table 2: Llama Models (3.2-3B and 3.1-70B)
print("=" * 80)
print("TABLE 2: Llama3.2-3B and Llama3.1-70B Demographics")
print("=" * 80)

TABLE2 = table_demographics.create_paper_table(
    demo_df,
    models=["Llama3.2-3B", "Llama3.1-70B"]
)

print(f"\nTable 2 shape: {TABLE2.shape}")
print("\nTable 2:")
display(TABLE2)

In [None]:
# Table 3: Claude-3.5-Sonnet
print("=" * 80)
print("TABLE 3: Claude-3.5-s Demographics")
print("=" * 80)

TABLE3 = table_demographics.create_paper_table(
    demo_df,
    models=["Claude-3.5-s"]
)

print(f"\nTable 3 shape: {TABLE3.shape}")
print("\nTable 3:")
display(TABLE3)

## Export Tables

All tables can be exported to CSV for external table generation tools or LaTeX conversion.

In [None]:
# Export tables to CSV (optional)
# Uncomment to export:

# TABLE1.to_csv('table1_gpt_demographics.csv')
# TABLE2.to_csv('table2_llama_demographics.csv')
# TABLE3.to_csv('table3_claude_demographics.csv')

print("✅ All tables generated successfully!")
print("\nTables use binary demographic analysis with z-test for proportions")
print("Significance markers: * (p<0.05), † (p<0.01), ‡ (p<0.001)")