In [1]:

import sys
import os

from sklearn.linear_model import SGDClassifier, LinearRegression, Lasso, Ridge
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
import seaborn as sn
import random
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
from sklearn.manifold import TSNE
import tqdm
import copy
from sklearn.svm import LinearSVC 

from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import TruncatedSVD
import torch
from sklearn.linear_model import SGDClassifier

from sklearn.svm import LinearSVC

import sklearn
from sklearn.linear_model import LogisticRegression
import random
import pickle
import matplotlib.pyplot as plt
from sklearn import cluster
from sklearn import neural_network
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.models import KeyedVectors
import numpy as np
import warnings
import argparse
from sklearn.neural_network import MLPClassifier
from collections import defaultdict
import scipy
from scipy import stats
from scipy.stats import pearsonr
import pandas as pd
from collections import Counter

## BERT Experiment Procedure
The R-LACE experiment on BERT proceeds as follows:

- Finetune BERT to classify professions (BERT-finetuned)
	- Also, finetune BERT with two types of adversarial gender removal training (BERT-adv and BERT-mlp-adv) to use as a baseline

- Run Alg. 1 on representations
	- Take representations in last layer of BERT at the [CLS] token
	- Reduce dimensionality of representations to 300 using PCA
	- Run Alg. 1 with 5 random initializations
	
- Finetune linear profession-classification head after the projection

## Finetune BERT
In this step, BERT is fine-tuned to classify professions given a biographical text prompt. The experimenters also train two varieties of adversarial heads to predict gender where the gradient is reversed, which are used as baselines in the concept-removal experiment. Details can be found in the `finetune.py` file.

The resulting state dictionaries are saved in the `models` folder.

In [None]:
!python3 finetune.py --run_id 0 --device 0 --adv 0 --opt sgd &
!python3 finetune.py --run_id 1 --device 1 --adv 0 --opt sgd &
!python3 finetune.py --run_id 2 --device 2 --adv 0 --opt sgd &
!python3 finetune.py --run_id 3 --device 3 --adv 0 --opt sgd
!sleep 10
!python3 finetune.py --run_id 4 --device 0 --adv 0 --opt sgd &

!python3 finetune.py --run_id 0 --device 1 --adv 1 --opt sgd &
!python3 finetune.py --run_id 1 --device 2 --adv 1 --opt sgd &
!python3 finetune.py --run_id 2 --device 3 --adv 1 --opt sgd
!sleep 10
!python3 finetune.py --run_id 3 --device 0 --adv 1 --opt sgd &
!python3 finetune.py --run_id 4 --device 1 --adv 1 --opt sgd &

!python3 finetune.py --run_id 0 --device 2 --adv 1 --mlp_adv 1 --opt sgd &
!python3 finetune.py --run_id 1 --device 3 --adv 1 --mlp_adv 1 --opt sgd
!sleep 10
!python3 finetune.py --run_id 2 --device 0 --adv 1 --mlp_adv 1 --opt sgd &
!python3 finetune.py --run_id 3 --device 1 --adv 1 --mlp_adv 1 --opt sgd &
!python3 finetune.py --run_id 4 --device 2 --adv 1 --mlp_adv 1 --opt sgd
!sleep 100
!python3 encode.py --device 0

## Encode & Save Representations From Final Layer

In [None]:
import sys
import os
from sklearn.utils import shuffle
import random
from collections import defaultdict
import tqdm
import copy
import torch

import sklearn
from sklearn.linear_model import LogisticRegression
import random
import pickle
import numpy as np
import argparse
from transformers import BertModel, BertTokenizer

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
def load_bios(group):
    """Load bios data from pickle file.
    
    Args:
        group (str): train, dev, or test.

    Returns:
        list: List of bios texts.
    """
    with open(
        "/media/curttigges/project-files/datasets/bios/bios_data/{}.pickle".format(
            group
        ),
        "rb",
    ) as f:
        bios_data = pickle.load(f)
        txts = [d["hard_text_untokenized"] for d in bios_data]

    return txts

In [None]:
def encode(bert, tokenizer, texts):
    """Run a list of texts through BERT and return the hidden state at the classification token position of the last layer.

    The function first tokenizes the texts, then runs them through BERT, and finally returns the hidden state at the 
    classification token position of the last layer.

    Args:
        bert (BertModel): A BERT model.
        texts (list): A list of texts to encode.

    Returns:
        numpy.ndarray: A numpy array of shape (len(texts), bert_hidden_size) containing the BERT embeddings for the
        classification token of each text.
    """
    all_H = []
    bert.eval()
    with torch.no_grad():

        print("Encoding...")
        batch_size = 100
        pbar = tqdm.tqdm(range(len(texts)), ascii=True)

        for i in range(0, len(texts) - batch_size, batch_size):

            batch_texts = texts[i : i + batch_size]

            batch_encoding = tokenizer.batch_encode_plus(
                batch_texts, padding=True, max_length=512, truncation=True
            )
            input_ids, token_type_ids, attention_mask = (
                batch_encoding["input_ids"],
                batch_encoding["token_type_ids"],
                batch_encoding["attention_mask"],
            )
            input_ids = torch.tensor(input_ids).to(device)
            token_type_ids = torch.tensor(token_type_ids).to(device)
            attention_mask = torch.tensor(attention_mask).to(device)
            H = bert(
                input_ids=input_ids,
                token_type_ids=token_type_ids,
                attention_mask=attention_mask,
            )["pooler_output"]
            assert len(H.shape) == 2
            all_H.append(H.detach().cpu().numpy())

            pbar.update(batch_size)

        remaining = texts[(len(texts) // batch_size) * batch_size :]
        print(len(remaining))
        if len(remaining) > 0:
            batch_encoding = tokenizer.batch_encode_plus(
                remaining, padding=True, max_length=512, truncation=True
            )
            input_ids, token_type_ids, attention_mask = (
                batch_encoding["input_ids"],
                batch_encoding["token_type_ids"],
                batch_encoding["attention_mask"],
            )
            input_ids = torch.tensor(input_ids).to(device)
            token_type_ids = torch.tensor(token_type_ids).to(device)
            attention_mask = torch.tensor(attention_mask).to(device)
            H = bert(
                input_ids=input_ids,
                token_type_ids=token_type_ids,
                attention_mask=attention_mask,
            )["pooler_output"]
            assert len(H.shape) == 2
            all_H.append(H.detach().cpu().numpy())

    H_np = np.concatenate(all_H)
    assert len(H_np.shape) == 2
    assert len(H_np) == len(texts)
    return H_np

In [None]:
if __name__ == "__main__":
    """This script runs the BIOS prompts through fine-tuned BERT and saves the output of the residual stream (the final
    hidden state) at the classification token position to disk.
    """
    random.seed(0)
    np.random.seed(0)

    parser = argparse.ArgumentParser(description="An argparse example")
    parser.add_argument("--device", type=int, default=-1, required=False)
    parser.add_argument("--run_id", type=int, default=-1, required=False)
    args = parser.parse_args()
    device = "cpu" if args.device == -1 else "cuda:{}".format(args.device)
    print(device)

    # Load BERT
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
    bert = BertModel.from_pretrained("bert-base-uncased")
    bert.to(device)
    bert.eval()
    rand_seed = args.run_id

    # Make directories
    if not os.path.exists("encodings"):
        os.makedirs("encodings")
    if not os.path.exists("encodings/mlp-adv"):
        os.makedirs("encodings/mlp-adv")
    if not os.path.exists("encodings/linear-adv"):
        os.makedirs("encodings/linear-adv")
    if not os.path.exists("encodings/no-adv"):
        os.makedirs("encodings/no-adv")

    # Loop to load random set of BERT parameters and encode the data
    for finetuning_type in ["adv", "mlp_adv", "not-adv"]:
        for rand_seed in range(5):
            for mode in ["train", "dev", "test"]:

                txts = load_bios(mode)
                txts = txts[:]

                if finetuning_type == "adv":
                    print("Loading adv")
                    bert_params = torch.load(
                        "models/linear-adv/bert_{}.pt".format(rand_seed)
                    )
                elif finetuning_type == "mlp_adv":
                    print("Loading MLP adv")
                    bert_params = torch.load(
                        "models/mlp-adv/bert_{}.pt".format(rand_seed)
                    )
                else:
                    bert_params = torch.load(
                        "models/no-adv/bert_{}.pt".format(rand_seed)
                    )

                bert.load_state_dict(bert_params)
                # Get model output
                H = encode(bert, tokenizer, txts)
                # Save model output
                print(H.shape)
                path = (
                    "encodings/linear-adv/{}_{}_cls.npy".format(mode, rand_seed)
                    if finetuning_type == "adv"
                    else "encodings/mlp-adv/{}_{}_cls.npy".format(mode, rand_seed)
                    if finetuning_type == "mlp_adv"
                    else "encodings/no-adv/{}_{}_cls.npy".format(mode, rand_seed)
                )
                np.save(path, H)

## Run R-LACE on Representations
The next step is to perform R-LACE on the hidden states of the classification tokens in order to generate a projection matrix that will remove the gender representation directions. Before doing so, the experimenters run PCA on the hidden dimensions and reduce them.

### Imports

In [None]:
import sys
import os

sys.path.append("../../")
sys.path.append("../")
sys.path.append("/../")

from debias import get_debiasing_projection, get_rowspace_projection

from classifier import CovMaximizer
from sklearn.linear_model import SGDClassifier, LinearRegression, Lasso, Ridge
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
import seaborn as sn
import random
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
from sklearn.manifold import TSNE
import tqdm
import copy
from sklearn.svm import LinearSVC

from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import TruncatedSVD
import torch
from sklearn.linear_model import SGDClassifier
from rlace import solve_adv_game

from sklearn.svm import LinearSVC

import sklearn
from sklearn.linear_model import LogisticRegression
import random
import pickle
import matplotlib.pyplot as plt
from sklearn import cluster
from sklearn import neural_network
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.models import KeyedVectors
import numpy as np
import warnings
import argparse

### Setup

In [None]:
random.seed(0)
np.random.seed(0)

device = "cuda:0" if torch.cuda.is_available() else "cpu"
ranks = [1,4,8,16,32,50,64,100]
rlace_projs = defaultdict(dict)
inlp_projs = defaultdict(dict)
finetune_mode = "no-adv"

In [None]:


def set_seeds(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)


def load_bios(group, finetune_mode, seed=None):
    """Loads the encoded classification tokens for a given group (train, dev, test) and finetuning mode (no-adv, mlp-adv, 
        linear-adv)

    Args:
        group (str): train, dev, or test
        finetune_mode (str): no-adv, mlp-adv, linear-adv
        seed (int, optional): Random seed used for finetuning. Defaults to None.

    Returns:
        tuple: (X, Y, professions, txts, bios_data)
    """
    if finetune_mode not in ["no-adv", "mlp-adv", "linear-adv"]:
        X = np.load("bios_data/{}_cls.npy".format(group))
    else:
        X = np.load("encodings/{}/{}_{}_cls.npy".format(finetune_mode, group, seed))
    with open("bios_data/{}.pickle".format(group), "rb") as f:
        bios_data = pickle.load(f)
        Y = np.array([1 if d["g"] == "f" else 0 for d in bios_data])
        professions = np.array([d["p"] for d in bios_data])
        txts = [d["hard_text_untokenized"] for d in bios_data]
        random.seed(0)
        np.random.seed(0)
        X, Y, professions, txts, bios_data = sklearn.utils.shuffle(X, Y, professions, txts, bios_data)
        X = X[:]
        Y = Y[:]

    return X, Y, txts, professions, bios_data

### Load Encoded Classification Tokens

In [None]:
X, y, txts, professions, bios_data = load_bios("train", finetune_mode, args.run_id)
X, y = X[:100000], y[:100000]

### PCA

In [None]:
if not os.path.exists("pca"):
    os.makedirs("pca")
pca = PCA(random_state=args.run_id, n_components=300)
pca.fit(X)
with open("pca/pca_{}_{}.pickle".format(finetune_mode, args.run_id), "wb") as f:
    pickle.dump(pca, f)
X = pca.transform(X)

X_dev, y_dev, txts_dev, professions_dev, bios_data_dev = load_bios("dev", finetune_mode, args.run_id)
X_dev = pca.transform(X_dev)

### R-LACE

In [None]:
# Create folders to store results and plots
for random_run in [args.run_id]:
    os.makedirs("plots/original/pca/run={}".format(random_run), exist_ok=True)
    os.makedirs("plots/original/tsne/run={}".format(random_run), exist_ok=True)
    os.makedirs("plots/inlp/pca/run={}".format(random_run), exist_ok=True)
    os.makedirs("plots/inlp/tsne/run={}".format(random_run), exist_ok=True)
    os.makedirs("plots/rlace/pca/run={}".format(random_run), exist_ok=True)
    os.makedirs("plots/rlace/tsne/run={}".format(random_run), exist_ok=True)
    os.makedirs("interim/rlace/run={}".format(random_run), exist_ok=True)
    os.makedirs("interim/inlp/run={}".format(random_run), exist_ok=True)

    os.makedirs("plots/{}/original/pca/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("plots/{}/original/tsne/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("plots/{}/inlp/pca/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("plots/{}/inlp/tsne/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("plots/{}/rlace/pca/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("plots/{}/rlace/tsne/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("interim/{}/rlace/run={}".format(finetune_mode, random_run), exist_ok=True)
    os.makedirs("interim/{}/inlp/run={}".format(finetune_mode, random_run), exist_ok=True)

    set_seeds(random_run)

In [None]:
# Set up optimizers and result dictionaries
Ps_rlace, accs_rlace = {}, {}

optimizer_class = torch.optim.SGD
optimizer_params_P = {"lr": 0.005, "weight_decay": 1e-4, "momentum": 0.0}
optimizer_params_predictor = {"lr": 0.005, "weight_decay": 1e-5, "momentum": 0.9}

In [None]:
# Run RLACE for each rank
for rank in ranks:

    output = solve_adv_game(X, y, X, y, rank=rank, device=device, out_iters=60000,
                                optimizer_class=optimizer_class, optimizer_params_P=optimizer_params_P,
                                optimizer_params_predictor=optimizer_params_predictor, epsilon=0.002,
                                batch_size=256)

    P = output["P"]
    Ps_rlace[rank] = P
    accs_rlace[rank] = output["score"]

    # Save resulting projection matrices
    with open("interim/{}/rlace/run={}/Ps_rlace.pickle".format(finetune_mode, random_run), "wb") as f:
        pickle.dump((Ps_rlace, accs_rlace), f)

## Apply R-LACE Projection to Model

### Imports

In [None]:
import sys
import os

from sklearn.linear_model import SGDClassifier, LinearRegression, Lasso, Ridge
from sklearn.utils import shuffle
from sklearn.decomposition import PCA
import seaborn as sn
import random
from sklearn.metrics.pairwise import cosine_similarity
from collections import defaultdict
from sklearn.manifold import TSNE
import tqdm
import copy
from sklearn.svm import LinearSVC 

from sklearn.cross_decomposition import PLSRegression
from sklearn.decomposition import TruncatedSVD
import torch
from sklearn.linear_model import SGDClassifier

from sklearn.svm import LinearSVC

import sklearn
from sklearn.linear_model import LogisticRegression
import random
import pickle
import matplotlib.pyplot as plt
from sklearn import cluster
from sklearn import neural_network
from gensim.models.keyedvectors import Word2VecKeyedVectors
from gensim.models import KeyedVectors
import numpy as np
import warnings
import argparse
from sklearn.neural_network import MLPClassifier
from collections import defaultdict
import scipy
from scipy import stats
from scipy.stats import pearsonr
import pandas as pd
from collections import Counter

### Loading Functions

In [None]:
def load_bios(group):
    """Loads the bios data for the given group.
    
    Args:
        group (str): group (e.g. "all", "f", "m")

    Returns:
        z (np.array): gender labels
        txts (list): biographies without gendered pronouns/names
        professions (np.array): profession labels
        bios_data (list): bios data
    """
    with open("bios_data/{}.pickle".format(group), "rb") as f:
        bios_data = pickle.load(f)
        z = np.array([1 if d["g"]=="f" else 0 for d in bios_data]) # gender labels
        professions = np.array([d["p"] for d in bios_data]) # profession labels
        txts = [d["hard_text_untokenized"] for d in bios_data] # biographies without gendered pronouns/names
        
    return z,txts,professions,bios_data

def load_bios_representations(group, finetune_mode, seed=0):
    """Loads the encoded final-layer representations of the classification tokens for the given group, finetune mode and 
        seed. Also performs PCA on the representations, as was done in training.
    
    Args:
        group (str): group (e.g. "all", "f", "m")
        finetune_mode (str): finetune mode (e.g. "freezed", "finetuned")
        seed (int, optional): random seed. Defaults to 0.

    Returns:
        X (np.array): encoded final-layer representations of the classification tokens
    """
    if finetune_mode == "freezed": # only 1 random seed for the pretrained bert
        X = np.load("encodings//{}/{}_cls.npy".format(finetune_mode,group))
    else:
        X = np.load("encodings/{}/{}_{}_cls.npy".format(finetune_mode, group, seed))
    
    # perform PCA - as was done in training
    with open("pca/pca_{}_{}.pickle".format(finetune_mode, seed), "rb") as f:
        pca = pickle.load(f)
    X = pca.transform(X)
    
    # return transformed X
    return X

def load_projections(proj_type, finetune_mode, seed=0):
    """Load the projection matrices for the given projection type and finetune mode

    Args:
        proj_type (str): projection type (e.g. "inlp", "rlace")
        finetune_mode (str): finetune mode (e.g. "freezed", "finetuned")
        seed (int, optional): random seed. Defaults to 0.
    """
    with open("interim/{}/{}/run={}/Ps_{}.pickle".format(finetune_mode,proj_type,seed,proj_type), "rb") as f:
        rank2P = pickle.load(f)
        return rank2P

In [None]:
# Load bios data
z_train,txts_train,professions_train,train = load_bios("train")
z_test,txts_test,professions_test,test = load_bios("test")
z_dev,txts_dev,professions_dev,dev = load_bios("dev")

# Load encoded representations
if False and os.path.exists("analysis/") and os.path.exists("analysis/mode2x.pickle") and os.path.exists("analysis/mode2p.pickle"):
    with open("analysis/mode2p.pickle", "rb") as f:
        mode2p = pickle.load(f)
    with open("analysis/mode2x.pickle", "rb") as f:
        mode2x = pickle.load(f)

else:
    if not os.path.exists("analysis"): os.mkdir("analysis")
    mode2x = defaultdict(dict)
    mode2p = defaultdict(dict)

    for mode in ["freezed", "linear-adv", "mlp-adv", "no-adv"]:
     
        for group in ["train", "dev", "test"]:
            mode2x[mode][group] = {}      
            num_seeds = 1 if mode == "freezed" else 5
        
            for seed in range(num_seeds):
                print(mode, group, seed)     
                X = load_bios_representations(group, mode, seed=seed)
                    
                mode2x[mode][group][seed] = X
                mode2p[mode][seed] = {} 
                for projtype in ["rlace", "inlp"]:
                    rank2P = load_projections(projtype, mode, seed=seed)
                    mode2p[mode][seed][projtype] = rank2P
                
            
    with open("analysis/mode2x.pickle", "wb") as f:
        pickle.dump(mode2x, f)
    with open("analysis/mode2p.pickle", "wb") as f:
        pickle.dump(mode2p, f)

### Fine-tune Profession and Gender Classifiers

In [None]:
with open("analysis/mode2x.pickle", "rb") as f:
        mode2x = pickle.load(f)
with open("analysis/mode2p.pickle", "rb") as f:
        mode2p = pickle.load(f)

In [None]:
def train_classifier(X,y,X_dev, y_dev, X_test,y_test):
    """Trains a classifier on the given data and returns the classifier and its accuracy on the dev and test sets.

    Args:
        X (np.array): training data
        y (np.array): training labels
        X_dev (np.array): dev data
        y_dev (np.array): dev labels
        X_test (np.array): test data
        y_test (np.array): test labels

    Returns:
        clf (sklearn classifier): trained classifier
        score_dev (float): accuracy on dev set
        score_test (float): accuracy on test set
    """
    random.seed(0)
    np.random.seed(0)

#     clf = SGDClassifier(loss="log", fit_intercept=True,  max_iter=3, tol = 0.1*1e-3,n_iter_no_change=1,
#                            n_jobs=32,alpha=1e-4)
    clf = LogisticRegression(warm_start = True, penalty = 'l2',
                        solver = "saga", multi_class = 'multinomial', fit_intercept = True,
                        verbose = 5, n_jobs = 64, random_state = 1, max_iter = 10)
    
    clf.fit(X,y)
    score_dev = clf.score(X_dev,y_dev)
    score_test = clf.score(X_test, y_test)
    
    return clf, score_dev, score_test

random.seed(0)
np.random.seed(0)

prof_clfs = defaultdict(dict)
gender_clfs = defaultdict(dict)

for mode in ["freezed", "linear-adv", "mlp-adv", "no-adv"]:
    num_seeds = 1 if mode == "freezed" else 5
    for seed in range(num_seeds):
        prof_clfs[mode][seed] = dict()
        gender_clfs[mode][seed] = dict()
        print("============================")
        print("mode:", mode, "seed:", seed)
        for do_projection in [False, True]:
            
            
            
            X_train = mode2x[mode]["train"][seed]
            X_dev = mode2x[mode]["dev"][seed]
            X_test = mode2x[mode]["test"][seed]
            
            if not do_projection:
                prof_clf,prof_score_dev,prof_score_test = train_classifier(X_train, professions_train, X_dev, professions_dev, X_test, professions_test)
                prof_clfs[mode][seed][do_projection] = {"clf": prof_clf, "dev_score": prof_score_dev, "test_score": prof_score_test}
                
                gender_clf,gender_score_dev,gender_score_test = train_classifier(X_train, z_train, X_dev, z_dev, X_test, z_test)
                gender_clfs[mode][seed][do_projection] = {"clf": gender_clf, "dev_score": gender_score_dev, "test_score": gender_score_test}
                print("here", gender_score_dev, prof_score_dev)
            else:
                if mode in ["linear-adv", "mlp-adv"]: continue
                    
                prof_clfs[mode][seed][do_projection] = defaultdict(dict)
                gender_clfs[mode][seed][do_projection] = defaultdict(dict)
                
                for projtype in ["rlace", "inlp"]:
                    for rank in [1, 4, 8, 16, 32, 50, 64, 100]:
                        print(projtype, rank)
                        P = mode2p[mode][seed][projtype][0][rank if projtype == "rlace" else rank-1]
                        prof_clf,prof_score_dev,prof_score_test = train_classifier(X_train@P, professions_train, X_dev@P, professions_dev, X_test@P, professions_test)
                        prof_clfs[mode][seed][do_projection][projtype][rank] = {"clf": prof_clf, "dev_score": prof_score_dev, "test_score": prof_score_test} 
                        
                        gender_clf,gender_score_dev,gender_score_test = train_classifier(X_train@P, z_train, X_dev@P, z_dev, X_test@P, z_test)
                        gender_clfs[mode][seed][do_projection][projtype][rank] = {"clf": gender_clf, "dev_score": gender_score_dev, "test_score": gender_score_test}
                        print("here here", gender_score_dev, prof_score_dev)

In [None]:
with open("analysis/gender_clfs.pickle", "wb") as f:
    pickle.dump(gender_clfs, f)
    
with open("analysis/prof_clfs.pickle", "wb") as f:
    pickle.dump(prof_clfs, f)

### Record Classifier Accuracy vs. Rank

In [None]:
with open("analysis/gender_clfs.pickle", "rb") as f:
    gender_clfs = pickle.load(f)
    
with open("analysis/prof_clfs.pickle", "rb") as f:
    prof_clfs = pickle.load(f)

In [None]:
def create_df(clf_dict, mode, projtype, do_projection):
    d = defaultdict(dict) if do_projection else dict()
    num_seeds = 1 if mode == "freezed" else 5
    idx = list(range(num_seeds))
    ranks = [1, 4, 8, 16, 32, 50, 64, 100]
    
    for seed in range(num_seeds):
        if not do_projection:
            d[seed] = clf_dict[mode][seed][do_projection]["test_score"]
        else: 
            
            for rank in ranks:
                d[seed][rank] = clf_dict[mode][seed][do_projection][projtype][rank]["test_score"]
    
    try:
        df = pd.DataFrame(d)
    except:
        df = pd.DataFrame({k:[v] for k,v in d.items()}, index = range(len(ranks)))#, index = ranks)
    df['avg'] = df.mean(numeric_only=True, axis=1)
    df["std"] = df.std(numeric_only=True, axis=1)
    df.rename_axis("rank", inplace=True)
    df.reset_index(inplace=True)
    return df

def get_maj(Y):
    
    from collections import Counter
    c = Counter(Y)
    p,q = list(c.values())
    return max(p/(p+q), 1 - p/(p+q))

def plot(df_rlace, df_inlp, xlabel, ylabel, filename, baseline=None, baseline_label=None):

    sn.set()

    fig, ax = plt.subplots()
    plt.rcParams['font.family'] = 'Sans'

    df_rlace.plot('rank', 'avg', yerr='std', ax=ax, label="RLACE (ours)", marker="*")
    df_inlp.plot('rank', 'avg', yerr='std', ax=ax, label="INLP", marker="*")

    plt.ylabel(ylabel, fontsize=18)
    plt.xlabel(xlabel, fontsize=18)
    if baseline:
        ax.axhline(baseline, label=baseline_label, color = "black", linestyle="--")

    plt.legend(fontsize=18)
    #ax.yaxis.grid(color='gray', linestyle="-")
    #ax.xaxis.grid(color='gray', linestyle='-')
    plt.yticks(fontsize=18)
    #plt.xticks(range(1,21,2), fontsize=18)
    plt.subplots_adjust(bottom=0.17)
    plt.subplots_adjust(left=0.15)
    ax.figure.savefig("analysis/analysis-results/{}".format(filename), dpi = 700) 

    plt.show()

In [None]:
mode2proj2profdf = defaultdict(dict)
mode2proj2genderdf = defaultdict(dict)

for mode in ["no-adv", "mlp-adv", "freezed", "linear-adv"]:
        for projtype in ["rlace", "inlp", "none"]:
            if (projtype != "none") and mode in ["mlp-adv", "linear-adv"]: continue
                
            df = create_df(prof_clfs, mode, projtype, True if projtype!="none" else False)
            mode2proj2profdf[mode][projtype] = df
            df = create_df(gender_clfs, mode, projtype, True if projtype!="none" else False)
            mode2proj2genderdf[mode][projtype] = df            
                

In [None]:
plot(mode2proj2genderdf["no-adv"]["rlace"], mode2proj2genderdf["no-adv"]["inlp"], "Dimensions removed", "Post-Projection Accuracy", "gender-finetuned.pdf", 
     get_maj(z_test),
    baseline_label="Majority")

In [None]:
plot(mode2proj2genderdf["freezed"]["rlace"], mode2proj2genderdf["freezed"]["inlp"], "Dimensions removed", "Post-Projection Accuracy", "gender-freezed.pdf", 
     get_maj(z_test),
    baseline_label="Majority")