# Debiasing Word Embeddings  - Senior Comps
## Data Analysis
For personal reference:
* [Jupyter Notebook Shortcuts](http://maxmelnick.com/2016/04/19/python-beginner-tips-and-tricks.html)
* [Markdown Guide](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet)


In [2]:
# IMPORTS
import pandas as pd

In [9]:
raw_data = pd.read_csv('results.csv')
gender_directions = ['IMPLIED', 'LITERAL', 'PRONOUNS']
models = ['MODEL 1', 'MODEL 2']
adjective_data = raw_data.iloc[::2]
occupation_data = raw_data.iloc[1::2]


def check_bias_minimization(word_data):
    for label in gender_directions:
        print(label)
        column_data = word_data[[label]]
        for i in range(2):
            print('\t' + models[i])
            if column_data.iloc[i, 0] > column_data.iloc[i + 2, 0]:
                print('\t\t' + 'BIAS REDUCED')
            else:
                print('\t\t' + 'BIAS NOT REDUCED')

print('--- ADJECTIVES ---')
check_bias_minimization(adjective_data)
print()
print('--- OCCUPATIONS ---')
check_bias_minimization(occupation_data)


--- ADJECTIVES ---
IMPLIED
	MODEL 1
		BIAS REDUCED
	MODEL 2
		BIAS INCREASED
LITERAL
	MODEL 1
		BIAS INCREASED
	MODEL 2
		BIAS INCREASED
PRONOUNS
	MODEL 1
		BIAS REDUCED
	MODEL 2
		BIAS REDUCED

--- OCCUPATIONS ---
IMPLIED
	MODEL 1
		BIAS REDUCED
	MODEL 2
		BIAS INCREASED
LITERAL
	MODEL 1
		BIAS INCREASED
	MODEL 2
		BIAS INCREASED
PRONOUNS
	MODEL 1
		BIAS INCREASED
	MODEL 2
		BIAS INCREASED


In [8]:

'''
for direction in gender_directions:
    df = pd.DataFrame({'Direct Bias': MODEL1.iloc[::2][[direction]].iloc[0,0]}, index=columns)
    df.plot.bar(rot=0)
'''
occupation_data

Unnamed: 0,IMPLIED,LITERAL,PRONOUNS
1,0.001005,0.00089,0.00087
3,0.001005,0.00089,0.00087
5,0.000955,0.001053,0.000922
7,0.002581,0.001612,0.000885


In [5]:
# PLOT RESULTS:
from bokeh.io import show, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.layouts import gridplot

output_notebook()

# adjectives, occupations

MODEL1 = [raw_data.iloc[0:4].iloc[::2], raw_data.iloc[0:4].iloc[1::2]] 
MODEL2 = [raw_data.iloc[4:8].iloc[::2], raw_data.iloc[4:8].iloc[1::2]] 

columns = ['Original Model', 'Retrained Model']

def get_model_plots(model_data):
    model_plts = []
    for i in range(len(model_data)):
        word_type_mdls = []
        for direction in gender_directions:
            p = figure(plot_width=250, plot_height=250, title=None, x_range=columns, y_axis_type="log")
            p.vbar(x=columns, top=model_data[i][[direction]].iloc[0,0], width=0.5) # Can't be 0,0 because it excludes a value
            word_type_mdls.append(p)
        model_plts.append(word_type_mdls)
    return model_plts


model_1_plots = get_model_plots(MODEL1)
model_2_plots = get_model_plots(MODEL2)
grid = gridplot(model_1_plots)
#handle = show(grid, notebook_handle=True) 
#push_notebook(handle=handle)
show(grid)

'''
# BASE CASE: TRY TO GET THIS WORKING FIRST
p = figure(plot_width=250, plot_height=250, title=None, x_range=columns)
p.vbar(x=columns, top=MODEL1[0][['LITERAL']].values.T.tolist()[0], width=0.5)
show(p)
'''


"\n# BASE CASE: TRY TO GET THIS WORKING FIRST\np = figure(plot_width=250, plot_height=250, title=None, x_range=columns)\np.vbar(x=columns, top=MODEL1[0][['LITERAL']].values.T.tolist()[0], width=0.5)\nshow(p)\n"