In [1]:
from configs import OCCUPATION_LIST
from itertools import combinations
import pandas as pd 
import random
import numpy as np
from scipy import interpolate
from scipy.optimize import bisect

def find_pse(occ):
    df = pd.read_feather(f'data/{occ}.feather')
    arr = np.array(df.changepoints)
    return bisect(interpolate.interp1d(df['rank'], arr-0.5), min(df['rank']), max(df['rank']))


In [2]:
occupation_combinations = list(combinations(OCCUPATION_LIST, 2))

summary = {
    "occupation":[],
    "ff_pse":[],
    "mm_pse":[],
    "fm_pse":[],
    "mf_pse":[]
}

for lhs, rhs in occupation_combinations:
    summary['occupation'].append(f"{lhs}_{rhs}")

    summary['ff_pse'].append(find_pse(f"female_{lhs}_female_{rhs}"))
    summary['mm_pse'].append(find_pse(f"male_{lhs}_male_{rhs}"))
    summary['fm_pse'].append(find_pse(f"female_{lhs}_male_{rhs}"))
    summary['mf_pse'].append(find_pse(f"male_{lhs}_female_{rhs}"))



In [3]:
summary_df = pd.DataFrame(summary)

In [10]:
summary_df

Unnamed: 0,occupation,ff_pse,mm_pse,fm_pse,mf_pse
0,technician_accountant,-1.0,-5.0,-4.0,-1.0
1,technician_supervisor,-5.0,-5.0,-5.0,0.0
2,technician_engineer,-1.0,1.0,-1.0,1.0
3,technician_worker,3.0,3.5,-1.0,4.0
4,technician_educator,0.0,2.0,7.0,-2.0
...,...,...,...,...,...
1220,programmer_hygienist,-3.5,-2.0,-3.5,1.0
1221,programmer_scientist,-1.0,-3.5,-5.0,1.0
1222,paralegal_hygienist,-11.0,-2.0,-4.0,-1.0
1223,paralegal_scientist,0.0,-7.0,-5.0,-1.0


```
wl - wr + wgl - wgr = -pse
wl - wr + wgl - wgr = -pse
wl - wr + wgl - wgr = -pse
wl - wr + wgl - wgr = -pse

t - a + fp - fh = 11
t - a + mp - mh = 2
t - a + fp - mh = 4
t - a + mp - fh = 1


fh - mh = 7
fh - mh = -1 
mp - fp = -2
mp - fp = -10
```


In [5]:
from numpy.linalg import inv


In [7]:
a = np.array([[1,2],[3,4]])


array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [27]:
summary_df['right_word_bias'] = (summary_df['fm_pse']-summary_df['ff_pse']+summary_df['mm_pse']-summary_df['mf_pse'])/2
summary_df['left_word_bias'] = (summary_df['ff_pse']-summary_df['mf_pse']+summary_df['fm_pse']-summary_df['mm_pse'])/2


In [28]:
summary_df['right_word'] = summary_df['occupation'].apply(lambda x:x.split('_')[1])
summary_df['left_word'] = summary_df['occupation'].apply(lambda x:x.split('_')[0])


In [29]:
words = summary_df['right_word'].append(summary_df['left_word'])
bias = summary_df['right_word_bias'].append(summary_df['left_word_bias'])
bias_df = pd.DataFrame({"words": words, "bias":bias})
bias_df.groupby("words").mean().sort_values("bias")

Unnamed: 0_level_0,bias
words,Unnamed: 1_level_1
clerk,-5.688776
scientist,-3.80102
architect,-3.790816
worker,-3.604592
plumber,-3.436224
veterinarian,-2.839286
surgeon,-2.331633
examiner,-2.329082
salesperson,-2.25
paralegal,-2.028061


In [35]:
tmp = pd.read_csv('professionsBLS2015.tab',delimiter="\t").sort_values('TotalEmployed')

In [45]:
tmp[['Woman', 'label1']].merge()

Unnamed: 0,TotalEmployed,Women,AfricanAmerican,Asian,HispanicLatino,none,label1,label2,label3,label4,label5
30,50,3.5,21.3,1.4,8.2,,engineer,operator,,,
62,51,9.4,14.2,5.5,21.5,,operator,,,,
21,51,2.8,10.4,0.0,11.0,,worker,,,,
65,51,10.3,11.5,0.2,29.0,,operator,setter,tender,,
56,52,7.7,18.6,1.7,2.1,,conductor,yardmaster,,,
...,...,...,...,...,...,...,...,...,...,...,...
111,17647,21.8,15.3,4.7,22.3,,,,,,
287,17897,72.2,13.9,4.7,15.6,,,,,,
227,25896,56.7,16.8,5.4,23.5,,,,,,
255,33598,61.5,12.3,5.2,15.3,,,,,,


In [64]:
tmp2 = bias_df.groupby("words").mean().sort_values("bias").copy()
tmp2['label1'] = tmp2.index
tmp2 = tmp2.reset_index(drop = True) 

tmp3 = tmp[['label1', 'Women']].merge(tmp2, how = 'inner', on = 'label1').groupby('label1').mean()

In [69]:
from scipy.stats import pearsonr, spearmanr

In [77]:
spearmanr(tmp3['Women'].apply(lambda x: int(x > 0)*abs(x - 50)), tmp3['bias'])

SpearmanrResult(correlation=0.15483825537079976, pvalue=0.3155802218202704)

In [73]:
tmp3

Unnamed: 0_level_0,Women,bias
label1,Unnamed: 1_level_1,Unnamed: 2_level_1
accountant,59.7,-0.673469
administrator,39.866667,0.084184
advisor,37.9,-1.326531
appraiser,39.3,-0.872449
architect,18.9,-3.790816
baker,60.8,1.096939
bartender,59.8,1.903061
carpenter,1.8,-0.836735
chemist,36.1,0.346939
clerk,67.955556,-5.688776


In [11]:
h_e = pd.read_feather('data/female_electrician_female_hygienist.feather')

In [12]:
h_e

Unnamed: 0,filename,value,inputTranscript,interpretations,sessionId,res,rank,changepoints
0,female_electrician_female_hygienist-14.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",5026921d-0a7c-4db0-bf03-3d9b6c12cb5d,{'ResponseMetadata': {'RequestId': '532ee3bf-2...,-14,0.0
1,female_electrician_female_hygienist-12.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",7728ee44-e72e-431f-b1db-a0f59aab08d1,{'ResponseMetadata': {'RequestId': 'b65e4f28-d...,-12,0.0
2,female_electrician_female_hygienist-10.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",154dec33-b6b5-476c-899e-22e139650327,{'ResponseMetadata': {'RequestId': '52f27693-5...,-10,0.0
3,female_electrician_female_hygienist-08.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",2ae573dd-f9f2-4262-836c-82f8a11e22ac,{'ResponseMetadata': {'RequestId': '660bbcd1-4...,-8,0.0
4,female_electrician_female_hygienist-06.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",add4e68f-a134-4467-a079-02da64a06c2f,{'ResponseMetadata': {'RequestId': '6f354e56-7...,-6,0.0
5,female_electrician_female_hygienist-04.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",7a588840-da42-4e91-aa29-73f72ca4a06c,{'ResponseMetadata': {'RequestId': '3985a4d3-7...,-4,0.0
6,female_electrician_female_hygienist-02.pcm,electrician,"""my occupation is electrician""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",d4be815c-a2ce-4807-a495-ff76616532ff,{'ResponseMetadata': {'RequestId': '80a84bca-4...,-2,0.0
7,female_electrician_female_hygienist+00.pcm,,"""my occupation""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",2951db75-0726-4693-9541-cfe8d5ac99ce,{'ResponseMetadata': {'RequestId': '7860eac7-6...,0,0.5
8,female_electrician_female_hygienist+02.pcm,hygienist,"""my occupation is hygienist""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",7f0e3121-b376-4713-bbdf-bdd3324608c4,{'ResponseMetadata': {'RequestId': '68ab3708-d...,2,1.0
9,female_electrician_female_hygienist+04.pcm,hygienist,"""my occupation is hygienist""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",aad36fb5-cc2a-4f75-978b-bce9dda13a5b,{'ResponseMetadata': {'RequestId': '5f59ae54-0...,4,1.0


In [13]:
from preprocessing import generate_mixed_audio 

In [29]:
generate_mixed_audio("male_fav_electrician","female_fav_hygienist")
generate_mixed_audio("female_fav_electrician","male_fav_hygienist")
generate_mixed_audio("female_fav_electrician","female_fav_hygienist")
generate_mixed_audio("male_fav_electrician","male_fav_hygienist")

In [4]:
def convert_audio(root_path):
    for i in os.listdir(root_path):
        !ffmpeg -f s16le -ar 16K -i {root_path+i}  {root_path+i.split(".")[0]+".mp3"}

convert_audio("audios/female_fav_electrician_female_fav_hygienist/")
convert_audio("audios/male_fav_electrician_female_fav_hygienist/")
convert_audio("audios/female_fav_electrician_male_fav_hygienist/")

pe --enable-libfribidi --enable-libgme --enable-libgsm --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librubberband --enable-librsvg --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzmq --enable-libzvbi --enable-omx --enable-openal --enable-opengl --enable-sdl2 --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libopencv --enable-libx264 --enable-shared
  libavutil      55. 78.100 / 55. 78.100
  libavcodec     57.107.100 / 57.107.100
  libavformat    57. 83.100 / 57. 83.100
  libavdevice    57. 10.100 / 57. 10.100
  libavfilter     6.107.100 /  6.107.100
  libavresample   3.  7.  0 /  3.  7.  0
  libswscale      4.  8.100 /  4.  8.100
  libswresample 

In [27]:
from polly_audio_generator import generate_audio
from configs import MALE_VOICE_ID, FEMALE_VOICE_ID
generate_audio("my favorite job is hygienist",MALE_VOICE_ID, "male_fav_hygienist")
generate_audio("my favorite job is electrician", MALE_VOICE_ID, "male_fav_electrician" )
generate_audio("my favorite job is hygienist", FEMALE_VOICE_ID, "female_fav_hygienist" )
generate_audio("my favorite job is electrician", FEMALE_VOICE_ID, "female_fav_electrician" )


In [9]:
from polly_audio_generator import generate_audio
from configs import MALE_VOICE_ID, FEMALE_VOICE_ID

generate_audio("my favorite job is hygienist", FEMALE_VOICE_ID, "female_fav_hygienist.mp3" )
