In [296]:
from configs import OCCUPATION_LIST
from itertools import combinations
import pandas as pd 
import random
import numpy as np
from scipy import interpolate
from scipy.optimize import bisect

def find_pse(occ):
    df = pd.read_feather(f'data/{occ}.feather')
    arr = np.array(df.changepoints)
    return bisect(interpolate.interp1d(df['rank'], arr-0.5), min(df['rank']), max(df['rank']))


In [300]:
occupation_combinations = list(combinations(OCCUPATION_LIST, 2))

summary = {
    "occupation":[],
    "ff_pse":[],
    "mm_pse":[],
    "fm_pse":[],
    "mf_pse":[]
}

for lhs, rhs in occupation_combinations:
    summary['occupation'].append(f"{lhs}_{rhs}")

    summary['ff_pse'].append(find_pse(f"female_{lhs}_female_{rhs}"))
    summary['mm_pse'].append(find_pse(f"male_{lhs}_male_{rhs}"))
    summary['fm_pse'].append(find_pse(f"female_{lhs}_male_{rhs}"))
    summary['mf_pse'].append(find_pse(f"male_{lhs}_female_{rhs}"))



In [302]:
summary_df = pd.DataFrame(summary)

In [303]:
summary_df

Unnamed: 0,occupation,ff_pse,mm_pse,fm_pse,mf_pse
0,technician_accountant,-1.0,-5.0,-4.0,-1.0
1,technician_supervisor,-5.0,-5.0,-5.0,0.0
2,technician_engineer,-1.0,1.0,-1.0,1.0
3,technician_worker,3.0,3.5,-1.0,4.0
4,technician_educator,0.0,2.0,7.0,-2.0
...,...,...,...,...,...
1220,programmer_hygienist,-3.5,-2.0,-3.5,1.0
1221,programmer_scientist,-1.0,-3.5,-5.0,1.0
1222,paralegal_hygienist,-11.0,-2.0,-4.0,-1.0
1223,paralegal_scientist,0.0,-7.0,-5.0,-1.0


In [304]:
summary_df['did'] = (summary_df['ff_pse']-summary_df['mm_pse'])-(summary_df['fm_pse']-summary_df['mf_pse'])

In [305]:
summary_df

Unnamed: 0,occupation,ff_pse,mm_pse,fm_pse,mf_pse,did
0,technician_accountant,-1.0,-5.0,-4.0,-1.0,7.0
1,technician_supervisor,-5.0,-5.0,-5.0,0.0,5.0
2,technician_engineer,-1.0,1.0,-1.0,1.0,0.0
3,technician_worker,3.0,3.5,-1.0,4.0,4.5
4,technician_educator,0.0,2.0,7.0,-2.0,-11.0
...,...,...,...,...,...,...
1220,programmer_hygienist,-3.5,-2.0,-3.5,1.0,3.0
1221,programmer_scientist,-1.0,-3.5,-5.0,1.0,8.5
1222,paralegal_hygienist,-11.0,-2.0,-4.0,-1.0,-6.0
1223,paralegal_scientist,0.0,-7.0,-5.0,-1.0,11.0


In [326]:
summary_df['ff_mm'] = (summary_df['ff_pse']-summary_df['mm_pse'])

In [306]:
hist(summary_df.did)

In [307]:
summary_df.sort_values("did")

Unnamed: 0,occupation,ff_pse,mm_pse,fm_pse,mf_pse,did
258,educator_teacher,-5.0,9.0,7.0,-5.0,-26.0
203,worker_janitor,-7.0,5.0,4.0,-3.0,-19.0
1199,nutritionist_baker,-2.0,11.0,7.0,3.0,-17.0
644,librarian_lawyer,-14.0,3.0,3.0,3.0,-17.0
405,mechanic_manager,-5.0,1.0,3.0,-7.0,-16.0
...,...,...,...,...,...,...
347,counselor_plumber,3.0,-7.0,-1.0,7.0,18.0
53,accountant_clerk,5.0,-7.0,-5.0,3.0,20.0
590,salesperson_baker,7.0,-14.0,7.0,9.0,23.0
944,specialist_baker,7.0,-14.0,5.0,7.0,23.0


In [342]:
summary_df[summary_df.occupation.str.contains('')].sort_values("ff-(fm-mf)")

Unnamed: 0,occupation,ff_pse,mm_pse,fm_pse,mf_pse,did,ff_mm,ff-(fm-mf)
258,educator_teacher,-5.00,9.0,7.00,-5.00,-26.00,-14.00,-17.00
389,inspector_instructor,-11.00,-11.0,-7.00,-11.00,-4.00,0.00,-15.00
405,mechanic_manager,-5.00,1.0,3.00,-7.00,-16.00,-6.00,-15.00
203,worker_janitor,-7.00,5.0,4.00,-3.00,-19.00,-12.00,-14.00
644,librarian_lawyer,-14.00,3.0,3.00,3.00,-17.00,-17.00,-14.00
...,...,...,...,...,...,...,...,...
305,clerk_plumber,-1.00,5.0,-7.00,9.00,10.00,-6.00,15.00
318,clerk_programmer,3.00,3.0,-5.25,7.00,12.25,0.00,15.25
32,technician_plumber,7.00,1.0,-5.00,4.00,15.00,6.00,16.00
1072,practitioner_plumber,8.75,0.0,-3.00,5.25,17.00,8.75,17.00


In [336]:
summary_df['ff-(fm-mf)'] = summary_df['ff_pse']-(summary_df['fm_pse']-summary_df['mf_pse'])

In [343]:
pd.read_feather("data/male_inspector_male_instructor.feather")

Unnamed: 0,filename,value,inputTranscript,interpretations,sessionId,res,rank,changepoints
0,male_inspector_male_instructor-14.pcm,inspector,"""my occupation is inspector""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",74dffea9-e434-4804-b41d-aaa21881595f,{'ResponseMetadata': {'RequestId': '07a576c6-7...,-14,0.0
1,male_inspector_male_instructor-12.pcm,inspector,"""my occupation is inspector""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",c1e2906d-006b-4dcc-9dff-da3f12c1b620,{'ResponseMetadata': {'RequestId': '2f3432bc-9...,-12,0.0
2,male_inspector_male_instructor-10.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",15fc808f-2317-467c-80dd-82728fc85b70,{'ResponseMetadata': {'RequestId': '8dbf076f-7...,-10,1.0
3,male_inspector_male_instructor-08.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",0e8f9e2c-46af-41cf-8352-816bb8ce465b,{'ResponseMetadata': {'RequestId': '90d2377d-5...,-8,1.0
4,male_inspector_male_instructor-06.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",0fea3ebb-5c9a-4977-8d63-0b3add15fe40,{'ResponseMetadata': {'RequestId': '915f0606-4...,-6,1.0
5,male_inspector_male_instructor-04.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",e84ae6b1-8f11-44cc-b586-b45e65b13ad9,{'ResponseMetadata': {'RequestId': '595821fa-a...,-4,1.0
6,male_inspector_male_instructor-02.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",42b7c047-0384-4d3a-9221-2732b126ab4c,{'ResponseMetadata': {'RequestId': '8a7b3fb1-8...,-2,1.0
7,male_inspector_male_instructor+00.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",134412a2-d5ae-4b0a-96b2-0d690961d57c,{'ResponseMetadata': {'RequestId': '43cb61b3-0...,0,1.0
8,male_inspector_male_instructor+02.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",d62710a8-29c6-4970-9df8-9c8c55a841da,{'ResponseMetadata': {'RequestId': '9dcfbdc3-4...,2,1.0
9,male_inspector_male_instructor+04.pcm,instructor,"""my occupation is instructor""","[{""nluConfidence"":{""score"":1.0},""intent"":{""nam...",4d13cb75-95bd-4e23-9182-ab1111bfa3fd,{'ResponseMetadata': {'RequestId': '3f042723-4...,4,1.0


In [344]:
from preprocessing import generate_mixed_audio
generate_mixed_audio("male_inspector","male_instructor")