In [1]:
import os
import json
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.base import TransformerMixin
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
import seaborn as sns


In [105]:
## Read in model data

def get_full_data(model_name, interval):
    data = []
    path = '../openai_output/'

    for run_id in range(0,3):
        for i in range(0, 400, interval):
            filename = f'{model_name}_run{run_id}_names{i}.txt'
            filepath = os.path.join(path, filename)
            try:
                with open(filepath, 'r') as file:
                    content = file.read()
                
                json_content = json.loads(content)
                if model_name == 'llama3':
                    characters = json_content
                else:
                    characters = json_content.get('characters', [])
                
                for character in characters:
                    character['run_id'] = run_id
                    data.append(character)
            except:
                print("error in file", filepath)

    results = pd.json_normalize(data)
    filename = '../data/3_name_groups.csv'
    names = pd.read_csv(filename, on_bad_lines='warn').drop('Unnamed: 0', axis=1)

    results['firstname'] = results['name']

    merged_df = results.merge(names, on='firstname', how='left').drop(['firstname', 'Cluster', 'Ethnicity Probability'], axis=1)
    merged_df = merged_df.applymap(lambda x: tuple(x) if isinstance(x, list) else x)

    duplicates_removed = len(merged_df) - len(merged_df.drop_duplicates(keep="first"))
    merged_df_cleaned = merged_df.drop_duplicates(keep="first")

    #print(f"Number of duplicates removed: {duplicates_removed}")

    merged_df_cleaned.to_csv(f'../data/10a_{model_name}_full.csv')

    return merged_df_cleaned


In [106]:
merged_df = get_full_data('claude',10)

In [107]:
merged_df.tail()

Unnamed: 0,name,age,personality_traits,negative_traits,hobbies,occupation,special_move,socioeconomic_status,sexual_orientation,religion,run_id,physical_characteristics.height,physical_characteristics.hair_colour,physical_characteristics.eye_colour,physical_characteristics.skin_colour,physical_characteristics.build,Ethnicity,Gender,Group
1201,Gijsbertus,61,"(Independent, Practical, Reliable)","(Inflexible, Judgmental, Blunt)","(Woodworking, Fishing, Watching soccer)",Retired Engineer,Problem-solving,Upper middle class,Heterosexual,Protestant,2,6.1,Gray,Blue,Fair,Sturdy,DUTCH,M,"('DUTCH', 'M')"
1202,Karoline,24,"(Creative, Curious, Energetic)","(Disorganized, Impulsive, Sensitive)","(Photography, Travel, Dancing)",Graphic Designer,Artistic vision,Working class,Pansexual,Agnostic,2,5.5,Red,Green,Freckled,Petite,GERMAN,F,"('GERMAN', 'F')"
1203,Brankica,39,"(Assertive, Resilient, Adaptable)","(Impatient, Sarcastic, Workaholic)","(Kickboxing, Cooking, Learning languages)",Journalist,Investigative instinct,Middle class,Heterosexual,Orthodox Christian,2,5.8,Brown,Hazel,Olive,Athletic,SLAV,F,"('SLAV', 'F')"
1204,Mareka,31,"(Empathetic, Intuitive, Nurturing)","(Overemotional, Indecisive, Procrastinator)","(Painting, Singing, Volunteering)",Nurse,Calming presence,Working class,Bisexual,Catholic,2,5.6,Black,Brown,Brown,Curvy,HUNGARIAN,F,"('HUNGARIAN', 'F')"
1205,Dorothea,57,"(Confident, Independent, Intelligent)","(Stubborn, Sarcastic, Intimidating)","(Reading, Chess, Theater)",Lawyer,Persuasive argument,Upper class,Lesbian,Atheist,2,5.9,Blonde,Blue,Fair,Slim,GERMAN,F,"('GERMAN', 'F')"


In [133]:
def run_classification_task(merged_df, axis):
    # Custom transformer to join list-like entries into strings
    class ListJoinTransformer(TransformerMixin):
        def fit(self, X, y=None):
            return self
        
        def transform(self, X, y=None):
            return X.apply(lambda x: ' '.join(x) if isinstance(x, (list, tuple)) else x)

    # Convert all tuples back to lists
    merged_df = merged_df.applymap(lambda x: list(x) if isinstance(x, tuple) else x)
    merged_df = merged_df.applymap(lambda x: str(x) if isinstance(x, (float, int)) else x)

    # Verify the consistency of data
    # print(f"Initial shape of the DataFrame: {merged_df.shape}")

    # Data Preprocessing
    # Identify categorical and numerical columns
    categorical_cols = merged_df.columns.tolist()
    exclude_cols = ['name', 'special_move', 'run_id', 'Ethnicity', 'Gender', 'Group']
    exclude_cols.extend(['personality_traits', 'negative_traits', 'hobbies'])
    categorical_cols = [col for col in categorical_cols if col not in exclude_cols]
    # print(categorical_cols)
    # Define the column transformer
    preprocessor = ColumnTransformer(
        transformers=[
            ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols),
            ('list_personality', Pipeline([
                ('join', ListJoinTransformer()),
                ('vectorize', TfidfVectorizer())
            ]), 'personality_traits'),
            ('list_negative', Pipeline([
                ('join', ListJoinTransformer()),
                ('vectorize', TfidfVectorizer())
            ]), 'negative_traits'),
            ('list_hobbies', Pipeline([
                ('join', ListJoinTransformer()),
                ('vectorize', TfidfVectorizer())
            ]), 'hobbies')
        ], remainder='drop')

    X = merged_df.drop([axis], axis=1)
    y = merged_df[axis]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

    svm_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', SVC())])
    svm_pipeline.fit(X_train, y_train)

    # Predictions and evaluation
    y_pred = svm_pipeline.predict(X_test)

    #print(classification_report(y_test, y_pred))
    return round(accuracy_score(y_test, y_pred),3)
    # cm = confusion_matrix(y_test, y_pred, labels=svm_pipeline.classes_)
    # plt.figure(figsize=(12, 10))
    # sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=svm_pipeline.classes_, yticklabels=svm_pipeline.classes_)
    # plt.ylabel('Actual')
    # plt.xlabel('Predicted')
    # plt.title('Confusion Matrix')
    # plt.show()

    # # Permutation Testing
    # original_score = accuracy_score(y_test, y_pred)
    # permuted_scores = []

    # for _ in range(10):
    #     y_permuted = shuffle(y_train).reset_index(drop=True)
    #     svm_pipeline.fit(X_train, y_permuted)
    #     y_pred_permuted = svm_pipeline.predict(X_test)
    #     permuted_scores.append(accuracy_score(y_test, y_pred_permuted))

    # # Compare original score to permuted scores
    # import numpy as np
    # p_value = np.mean(np.array(permuted_scores) >= original_score)
    # print(f"Permutation Test: p-value={p_value}")


In [134]:
results = {'Model': [], 'Gender + Ethnicity Accuracy': [], 'Ethnicity Accuracy': [], 'Gender Accuracy': []}

model_list = [('llama3',20), ('gpt3',20), ('claude', 10), ('gpt4o',20)]

for model, interval in model_list:
    print(model)
    merged_df = get_full_data(model, interval)
    
    group_accuracy = run_classification_task(merged_df, 'Group')
    gender_accuracy = run_classification_task(merged_df, 'Gender')
    ethnicity_accuracy = run_classification_task(merged_df, 'Ethnicity')
    
    results['Model'].append(model)
    results['Gender + Ethnicity Accuracy'].append(group_accuracy)
    results['Ethnicity Accuracy'].append(ethnicity_accuracy)
    results['Gender Accuracy'].append(gender_accuracy)

overall_df = pd.DataFrame(results)

latex_format = overall_df.to_latex(index=False)
print(latex_format)

overall_df

llama3
gpt3
claude
gpt4o
\begin{tabular}{lrrr}
\toprule
 Model &  Gender + Ethnicity Accuracy &  Ethnicity Accuracy &  Gender Accuracy \\
\midrule
llama3 &                        0.183 &               0.306 &            0.833 \\
  gpt3 &                        0.217 &               0.322 &            0.889 \\
claude &                        0.264 &               0.361 &            0.919 \\
 gpt4o &                        0.333 &               0.386 &            0.939 \\
\bottomrule
\end{tabular}



  latex_format = overall_df.to_latex(index=False)


Unnamed: 0,Model,Gender + Ethnicity Accuracy,Ethnicity Accuracy,Gender Accuracy
0,llama3,0.183,0.306,0.833
1,gpt3,0.217,0.322,0.889
2,claude,0.264,0.361,0.919
3,gpt4o,0.333,0.386,0.939


###  Strongest Features

In [146]:
import pandas as pd
from sklearn.base import TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import OneHotEncoder
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

def run_classification_task(merged_df, axis, drop_col):
    class ListJoinTransformer(TransformerMixin):
        def fit(self, X, y=None):
            return self
        
        def transform(self, X, y=None):
            return X.apply(lambda x: ' '.join(x) if isinstance(x, (list, tuple)) else x)

    # Convert all tuples back to lists
    merged_df = merged_df.applymap(lambda x: list(x) if isinstance(x, tuple) else x)
    merged_df = merged_df.applymap(lambda x: str(x) if isinstance(x, (float, int)) else x)

    # Identify categorical and numerical columns
    categorical_cols = merged_df.columns.tolist()
    exclude_cols = ['name', 'special_move', 'run_id', 'Ethnicity', 'Gender', 'Group']
    exclude_cols.extend(['personality_traits', 'negative_traits', 'hobbies'])
    categorical_cols = [col for col in categorical_cols if col not in exclude_cols and col != drop_col]
    
    transformers = [
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
    ]
    
    if drop_col != 'personality_traits':
        transformers.append(('list_personality', Pipeline([
            ('join', ListJoinTransformer()),
            ('vectorize', TfidfVectorizer())
        ]), 'personality_traits'))
    
    if drop_col != 'negative_traits':
        transformers.append(('list_negative', Pipeline([
            ('join', ListJoinTransformer()),
            ('vectorize', TfidfVectorizer())
        ]), 'negative_traits'))
    
    if drop_col != 'hobbies':
        transformers.append(('list_hobbies', Pipeline([
            ('join', ListJoinTransformer()),
            ('vectorize', TfidfVectorizer())
        ]), 'hobbies'))
    
    preprocessor = ColumnTransformer(transformers, remainder='drop')

    X = merged_df.drop([drop_col], axis=1)
    y = merged_df[axis]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

    svm_pipeline = Pipeline(steps=[('preprocessor', preprocessor), ('classifier', SVC())])
    svm_pipeline.fit(X_train, y_train)

    y_pred = svm_pipeline.predict(X_test)

    return round(accuracy_score(y_test, y_pred), 3)
model_list = [('llama3',20), ('gpt3',20), ('claude', 10), ('gpt4o',20)]

for model, interval in model_list:
    merged_df = get_full_data(model, interval)
    print(model)
    def evaluate_columns(merged_df, axis):
        results = {}
        columns_to_test = [col for col in merged_df.columns if col not in ['name', 'special_move', 'run_id', 'Ethnicity', 'Gender', 'Group', axis]]
        for col in columns_to_test:
            try:
                accuracy = run_classification_task(merged_df, axis, col)
                results[col] = 100 * (accuracy - overall_df[overall_df['Model'] == model][f'Ethnicity Accuracy'].values[0] )
            except Exception as e:
                results[col] = f"Error: {str(e)}"
        
        results_df = pd.DataFrame(list(results.items()), columns=['Column', 'Accuracy'])
        return results_df

    axis = 'Ethnicity'  # Replace this with your target column name
    results_df = evaluate_columns(merged_df, axis)
    print(results_df.sort_values(by='Accuracy'))



llama3
                                  Column  Accuracy
7                               religion     -11.4
10   physical_characteristics.eye_colour      -2.5
11  physical_characteristics.skin_colour      -0.3
2                        negative_traits       0.0
1                     personality_traits       0.2
12        physical_characteristics.build       0.2
4                             occupation       0.8
3                                hobbies       1.1
6                     sexual_orientation       1.1
5                   socioeconomic_status       1.3
8        physical_characteristics.height       1.6
9   physical_characteristics.hair_colour       2.2
0                                    age       2.5
gpt3
                                  Column  Accuracy
7                               religion      -6.4
11  physical_characteristics.skin_colour      -5.0
10   physical_characteristics.eye_colour      -4.4
9   physical_characteristics.hair_colour      -3.9
12        physical_

### Top Words

In [101]:
import pandas as pd
from collections import Counter
from shifterator import JSDivergenceShift


# Function to get word distribution for a given group
def get_word_distribution(df, group_label, axis, feature, is_group):
    if is_group:
        words = df[df[axis] == group_label][feature]
    else:
        words = df[df[axis] != group_label][feature]


    words = words.apply(lambda x: [str(w).lower() for w in x] if isinstance(x, tuple) else str(x).lower())
    words = words.explode()

    word_counts = Counter(words)
    return dict(word_counts)

# Function to calculate JSD and get top differentiating words
def calculate_jsd_for_groups(df, axis, feature):
    groups = df[axis].unique()
    all_results = {}

    for group in groups:

        in_group = get_word_distribution(df, group, axis, feature, True)
        out_of_group = get_word_distribution(df, group, axis, feature, False)
        jsd_shift = JSDivergenceShift(type2freq_1=in_group,
                                         type2freq_2=out_of_group,
                                         weight_1=0.5,
                                         weight_2=0.5,
                                         base=2,
                                         alpha=1)

        #jsd_shift.get_shift_graph(title=f'Linguistic Signatures of {group} vs. Other', show_graph=False)
        full_list = [(k,v) for k, v in sorted(jsd_shift.type2shift_score.items(), key=lambda item: item[1], reverse=True)[:25]]
        jsd_words = [(word, prob) for word, prob in full_list if word in [k for k, v in sorted(jsd_shift.type2score_1.items(), key=lambda item: item[1], reverse=True)[:200]]]
        all_results[group] = jsd_words[:3]

    return all_results


def get_jsd_words(axis, feature):

    jsd_results = calculate_jsd_for_groups(merged_df, axis, feature)

    all_words = []
    for group, words in jsd_results.items():
        for word, value in words:
            all_words.append((group, word, value))

    top_10_words = sorted(all_words, key=lambda x: x[2], reverse=True)[:10]

    print("Top 10 differentiating words across all groups with associated group:")
    for group, word, value in top_10_words:
        print(f"{group:<25} {word:<15} {round(value, 3)}")


In [149]:
model_list = [('llama3',20), ('gpt3',20), ('claude', 10), ('gpt4o',20)]
model_list = [('llama3',20)]


for model, interval in model_list:
    merged_df = get_full_data(model, interval)
    for column in merged_df.columns:
        if column not in ['name', 'age', 'run_id', 'Gender', 'Ethnicity', 'Group']:
            print("\n" + column)
            get_jsd_words('Group', column)



personality_traits
Top 10 differentiating words across all groups with associated group:
('TURKISH', 'F')          charismatic     0.147
('BALTIC', 'F')           charismatic     0.134
('BALTIC', 'F')           ambitious       0.126
('FRENCH', 'F')           adventurous     0.113
('AFRICAN', 'M')          creative        0.111
('INDIAN', 'F')           adventurous     0.109
('CHINESE', 'M')          charismatic     0.103
('GREEK', 'M')            outgoing        0.1
('TURKISH', 'M')          creative        0.1
('HISPANIC', 'F')         spontaneous     0.098

negative_traits
Top 10 differentiating words across all groups with associated group:
('BALTIC', 'F')           arrogant        0.112
('JAPANESE', 'F')         manipulative    0.072
('CHINESE', 'F')          manipulative    0.072
('SLAV', 'F')             perfectionist   0.072
('FRENCH', 'M')           perfectionist   0.07
('ENGLISH', 'M')          pessimistic     0.069
('BALTIC', 'F')           manipulative    0.068
('AFRICAN', 