In [46]:
import pandas as pd #for reading and manipulating 
import re #Regular expressions
import numpy as np #for manipulating arrays

In [47]:
#Read CSV with evaluation data
data= 'human_evaluation/venezuelan_names_human_evaluation.csv'
df= pd.read_csv(data) #full dataframe


In [48]:
#Dropping duplicates 

columns= list(df.keys()) #See duplicated columns
filtered_items = [item for item in columns if re.search(r'\.\d+$', item)] # Filter items using regular expression

for i in filtered_items: #looks at all items in filtered list and drops columns
    df.drop(columns=[i])

In [49]:
total_sample= df.keys()[2:] #full sample with names
print(f'total number of names in sample:{len(total_sample)}')

total number of names in sample:169


In [50]:
#Translating answers to numeric data 

# Function to convert alphabetic data into numbers
def binarizing_data(label):
    if 'No' in label:
        return 0
    elif 'Sí' in label:
        return 1

for i in list(df.keys()[1:]):
    df[i] = df[i].apply(lambda x: binarizing_data(x))

In [51]:
#Splitting participants 
df_v= df[df['¿Eres de nacionalidad venezolana?']==1] #data from people who are venezuelan speakers. 67
df_f= df[df['¿Eres de nacionalidad venezolana?']==0] #data from people who are not venezuelan speakers. 14

In [52]:
#Calculates acceptability of names 

def acceptability(dataframe, list_of_names): 
    output = {} #all outputs
    for i in list_of_names:
        data= dataframe[i].values #takes values of a single name
        number= sum(data) / len(data) #calculates rate of acceptability in relation to number of participants
        output[i]=number
    return output  

v_acceptability= acceptability(df_v, total_sample)  
f_acceptability= acceptability(df_f, total_sample)   

In [53]:
#Mean of acceptability 

mean_v = np.asarray(list(v_acceptability.values())).mean() #for venezuelan speakers
mean_f = np.asarray(list(f_acceptability.values())).mean() #for non-venezuelan speakers (i.e. speakers of other (hispanic) dialects)
overall_mean= (67/81)*mean_v + (14/81)*mean_f

print(f'Acceptability by venezuelan speakers:{mean_v}')
print(f'Acceptability by non-venezuelan speakers:{mean_f}')
print(f'Overall acceptability:{overall_mean}')

Acceptability by venezuelan speakers:0.4645411993287997
Acceptability by non-venezuelan speakers:0.5097210481825866
Overall acceptability:0.47235006209365177
