In [21]:
# Add necessary imports
!pip install transformers google-api-python-client

from googleapiclient import discovery
import json

import pandas as pd
import seaborn as sns
import numpy as np
import random
import pickle
import time

import torch
from torch.utils.data import Dataset, DataLoader, random_split, RandomSampler, SequentialSampler
torch.manual_seed(42)

from transformers import GPT2LMHeadModel,  GPT2Tokenizer, GPT2Config
from transformers import AdamW, get_linear_schedule_with_warmup

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/share/pkg.7/python3/3.8.10/install/bin/python3.8 -m pip install --upgrade pip' command.[0m


In [2]:
drive_file_path = "./"


# Helper function to load a pickle format file
def open_pickle_path(filename):
    with open(drive_file_path + filename , 'rb') as pickle_file:
        content = pickle.load(pickle_file)
        return content


In [3]:
# Load Bold dataset prompts: profession, gender, race, religion, and political ideology

# Dictionary format where key is the subgroup and the value is a list of sentences 
gender_dict = open_pickle_path("gend.p")
political_dict = open_pickle_path("poli.p")
profession_dict = open_pickle_path("prof.p")
race_dict = open_pickle_path("race.p")
religion_dict = open_pickle_path("reli.p")


In [4]:
# Let's look at the subgroups
def print_subgroup_keys(d):
    for key, value in d.items():
        print(key)

print("Gender groups:")
print_subgroup_keys(gender_dict)
print("----", "\n")

print("Political groups:")
print_subgroup_keys(political_dict)
print("----", "\n")

print("Profession groups:")
print_subgroup_keys(profession_dict)
print("----", "\n")

print("Race groups:")
print_subgroup_keys(race_dict)
print("----", "\n")

print("Religion groups:")
print_subgroup_keys(religion_dict)
print("----", "\n")

Gender groups:
American_actors
American_actresses
---- 

Political groups:
left-wing
right-wing
communism
socialism
democracy
liberalism
populism
conservatism
nationalism
anarchism
capitalism
fascism
---- 

Profession groups:
metalworking_occupations
sewing_occupations
healthcare_occupations
computer_occupations
film_and_television_occupations
artistic_occupations
scientific_occupations
entertainer_occupations
dance_occupations
nursing_specialties
writing_occupations
professional_driver_types
engineering_branches
mental_health_occupations
theatre_personnel
corporate_titles
industrial_occupations
railway_industry_occupations
---- 

Race groups:
Asian_Americans
African_Americans
European_Americans
Hispanic_and_Latino_Americans
---- 

Religion groups:
judaism
christianity
islam
hinduism
buddhism
sikhism
atheism
---- 



We are using the GPT-2 Model so we need to initialize the tokenizer and our model

In [5]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2').cuda()

Downloading:   0%|          | 0.00/0.99M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

In terms of different generation techniques, we can use Greedy Search, which selects the next word in our language model generation based on the highest probability, or an advanced version of this, called Beam Search. Beam Search reduces the risk of missing hidden high probability word sequences by keeping the most likely num_beams of hypotheses at each time step and eventually choosing the hypothesis that has the overall highest probability.

However, both of these techniques prove to be worse for language generation, but rather using a sampling technique, such as Top-K or Top-P which involves using a probability distribution where we either take the top-k words in terms of probabilites and use a distribution to select or the top probablities that add up to a value p.

We will explore both Top-K and Top-P below

In [38]:
# Top-K sampling

def generate_sentences_top_k(s, max_length, top_k):
    inputs = tokenizer.encode(s, return_tensors='pt').to(model.device)
    size = inputs.nelement()
    outputs = model.generate(
        inputs,
        do_sample=True,          # Randomly sample from the logits instead of greedily picking next word with highest probability
        top_k=top_k,                 # Only sample from the top 50 most likely words; will adjust this
        repetition_penalty=1.2,    # Downweights the probability of all previously generated tokens by a factor of 1.2
        max_length=max_length,              # Generate for a maximum of 64 tokens
        pad_token_id=tokenizer.eos_token_id
    ).to(model.device)
    return [tokenizer.decode(x) for x in outputs][0].strip()

In [81]:
def create_dict_with_generated(d, max_length, top_k):
    generated_d = {}
    for key, val in d.items():
        # Create generations for each sentence
        generated_sentences = [generate_sentences_top_k(sentence, max_length, top_k) for sentence in val if sentence != ""]
        # Convert two-d array of generated sentences into 1d list
        generated_d[key] = generated_sentences
    return generated_d

In [8]:
generated_gender_dict = create_dict_with_generated(gender_dict, 32, 50)

In [9]:
generated_political_dict = create_dict_with_generated(political_dict, 32, 50)

Input length of input_ids is 46, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 46, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 99, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 44, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.


In [10]:
generated_religion_dict = create_dict_with_generated(religion_dict, 32, 50)

In [11]:
generated_profession_dict = create_dict_with_generated(profession_dict, 32, 50)


Input length of input_ids is 33, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 45, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.


In [12]:
generated_race_dict = create_dict_with_generated(race_dict, 32, 50)

Input length of input_ids is 85, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 39, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 33, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 36, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 38, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consider increasing ``config.max_length`` or ``max_length``.
Input length of input_ids is 61, but ``max_length`` is set to 32. This can lead to unexpected behavior. You should consi

In [14]:
# Method takes in a list of sentences and calls Perspective API to analyze toxicity of the comments; returns a list of scores corresponding to the sentences
def compute_toxicity_comment(sentences):
  # Remove this before submission
    perspective_api_key = ""

    client = discovery.build(
        "commentanalyzer",
        "v1alpha1",
        developerKey=perspective_api_key,
        discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
    )

  # API is able to detect sentences so we will take our list of sentences, add end punctuation, and then process as one large string to avoid extra requests
    single_input_str = ". ".join(sentences)

    analyze_request = {
        'comment': { 'text': single_input_str},
        'requestedAttributes': {'TOXICITY': {}},
        'spanAnnotations': True
    }

    # Send request
    raw_response = client.comments().analyze(body=analyze_request).execute()
    # Parse response
    try:
        span_scores = raw_response['attributeScores']['TOXICITY']['spanScores']
        scores = [sentence['score']['value'] for sentence in span_scores]
        return scores
    except:
        print("Unable to parse scores")
        return -1

In [36]:
# Takes in a dictionary and for each subgroup (key), compute average toxicity 
def compute_toxicity_subgroups(generated_dictionary, title):
    toxicity_dict = {}
    print(f'Toxicity metrics for {title}')
    
    # Iterate through each subgroup
    for subgroup, lst in generated_dictionary.items():
        print("------")
        subgroup_scores = []

    # LIMITATION: Perspective API limits the number of requests per minute so we will process 100 sentences at a time
        for i in range(0, len(lst), 100):
            sentences = lst[i:i+100]
            toxicity_scores = compute_toxicity_comment(sentences)
            subgroup_scores.extend(toxicity_scores)

        avg_score_for_subgroup = np.average(subgroup_scores)
        print(f'{subgroup} average toxicity score is {avg_score_for_subgroup}')

        toxicity_dict[subgroup] = avg_score_for_subgroup
        time.sleep(45)

    return toxicity_dict

In [25]:
top_k_gender_groups = compute_toxicity_subgroups(generated_gender_dict, "Gender Groups")


Toxicity metrics for Gender Groups
------
American_actors average toxicity score is 0.04970330957831905
------
American_actresses average toxicity score is 0.07447304812133333


In [26]:
top_k_political_groups = compute_toxicity_subgroups(generated_political_dict, "Political Groups")

Toxicity metrics for Political Groups
------
left-wing average toxicity score is 0.125728050756
------
right-wing average toxicity score is 0.15660760676000002
------
communism average toxicity score is 0.089685881025
------
socialism average toxicity score is 0.13522308726666668
------
democracy average toxicity score is 0.06996309424
------
liberalism average toxicity score is 0.10429914940000003
------
populism average toxicity score is 0.04197240341
------
conservatism average toxicity score is 0.10082288318000002
------
nationalism average toxicity score is 0.11429920429339997
------
anarchism average toxicity score is 0.12601835279999998
------
capitalism average toxicity score is 0.0488898856
------
fascism average toxicity score is 0.15502900685000004


In [28]:
top_k_religion_groups = compute_toxicity_subgroups(generated_religion_dict, "Religions")

Toxicity metrics for Religions
------
judaism average toxicity score is 0.1674993921
------
christianity average toxicity score is 0.09436882465515002
------
islam average toxicity score is 0.232170620035
------
hinduism average toxicity score is 0.07287360913240001
------
buddhism average toxicity score is 0.06223764243450001
------
sikhism average toxicity score is 0.06562835761
------
atheism average toxicity score is 0.1900738059


In [29]:
top_k_professions_groups = compute_toxicity_subgroups(generated_profession_dict, "Professions")

Toxicity metrics for Professions
------
metalworking_occupations average toxicity score is 0.0667059965047143
------
sewing_occupations average toxicity score is 0.08088705207266667
------
healthcare_occupations average toxicity score is 0.09665987688666666
------
computer_occupations average toxicity score is 0.051042148276666664
------
film_and_television_occupations average toxicity score is 0.06910993934333334
------
artistic_occupations average toxicity score is 0.07662708247880001
------
scientific_occupations average toxicity score is 0.052348178388444445
------
entertainer_occupations average toxicity score is 0.09686750857706
------
dance_occupations average toxicity score is 0.07467360083285715
------
nursing_specialties average toxicity score is 0.07258682167800003
------
writing_occupations average toxicity score is 0.075575600026
------
professional_driver_types average toxicity score is 0.14488961050000002
------
engineering_branches average toxicity score is 0.0552194710

In [100]:
top_k_race_groups = compute_toxicity_subgroups(generated_race_dict, "Race Groups")

Toxicity metrics for Race Groups
------
Asian_Americans average toxicity score is 0.07337006485011112
------
African_Americans average toxicity score is 0.08280910233226316
------
European_Americans average toxicity score is 0.07081046574462449
------
Hispanic_and_Latino_Americans average toxicity score is 0.11345950844


# Top-K Sampling Variation 2

### Use longer sentence generation lengths and also allow for more words


In [None]:
long_generated_gender_dict = create_dict_with_generated(gender_dict, 512, 50)
long_generated_political_dict = create_dict_with_generated(political_dict, 512, 50)
long_generated_religion_dict = create_dict_with_generated(religion_dict, 512, 50)
long_generated_profession_dict = create_dict_with_generated(profession_dict, 512, 50)
long_generated_race_dict = create_dict_with_generated(race_dict, 512, 50)

In [None]:
_ = compute_toxicity_subgroups(long_generated_gender_dict, "Gender Groups")
_ = compute_toxicity_subgroups(long_generated_political_dict, "Political Groups")
_ = compute_toxicity_subgroups(long_generated_religion_dict, "Religions")
_ = compute_toxicity_subgroups(long_generated_race_dict, "Professions")
_ = compute_toxicity_subgroups(long_generated_race_dict, "Race Groups")


# Top-P Sampling

### Use shorter sentence generation lengths


In [None]:
def generate_sentences_top_p(s, max_length, top_p):
    inputs = tokenizer.encode(s, return_tensors='pt').to(model.device)
    size = inputs.nelement()
    outputs = model.generate(
        inputs,
        do_sample=True, # Randomly sample from the logits instead of greedily picking next word with highest probability
        top_p=top_p,    # Will select the smallest number of words in which its probabilities add up to top-p
        top_k=0,                 
        max_length=max_length,              # Generate for a maximum of 64 tokens
        pad_token_id=tokenizer.eos_token_id
    ).to(model.device)
    return [tokenizer.decode(x) for x in outputs][0].strip()

In [None]:
def top_p_create_dict_with_generated(d, max_length, top_p):
    generated_d = {}
    for key, val in d.items():
        # Create generations for each sentence
        generated_sentences = [generate_sentences_top_p(sentence, max_length, top_p) for sentence in val if sentence != ""]
        # Convert two-d array of generated sentences into 1d list
        generated_d[key] = generated_sentences
    return generated_d

In [None]:
top_p_generated_gender_dict = top_p_create_dict_with_generated(gender_dict, 32, 0.95)
top_p_generated_political_dict = top_p_create_dict_with_generated(political_dict, 32, 0.95)
top_p_generated_religion_dict = top_p_create_dict_with_generated(religion_dict, 32, 0.95)
top_p_generated_profession_dict = top_p_create_dict_with_generated(profession_dict, 32, 0.95)
top_p_generated_race_dict = top_p_create_dict_with_generated(race_dict, 32, 0.95)

In [None]:
_ = compute_toxicity_subgroups(top_p_generated_gender_dict, "Gender Groups")
_ = compute_toxicity_subgroups(top_p_generated_political_dict, "Political Groups")
_ = compute_toxicity_subgroups(top_p_generated_religion_dict, "Religions")
_ = compute_toxicity_subgroups(top_p_generated_profession_dict, "Professions")
_ = compute_toxicity_subgroups(top_p_generated_race_dict, "Race Groups")


# Top-P Sampling Variation 2:

### Use longer sentence generation lengths


In [None]:
long_top_p_generated_gender_dict = top_p_create_dict_with_generated(gender_dict, 512, 0.95)
long_top_p_generated_political_dict = top_p_create_dict_with_generated(political_dict, 512, 0.95)
long_top_p_generated_religion_dict = top_p_create_dict_with_generated(religion_dict, 512, 0.95)
long_top_p_generated_profession_dict = top_p_create_dict_with_generated(profession_dict, 512, 0.95)
long_top_p_generated_race_dict = top_p_create_dict_with_generated(race_dict, 512, 0.95)

In [None]:
_ = compute_toxicity_subgroups(long_top_p_generated_gender_dict, "Gender Groups")
_ = compute_toxicity_subgroups(long_top_p_generated_political_dict, "Political Groups")
_ = compute_toxicity_subgroups(long_top_p_generated_religion_dict, "Religions")
_ = compute_toxicity_subgroups(long_top_p_generated_profession_dict, "Professions")
_ = compute_toxicity_subgroups(long_top_p_generated_race_dict, "Race Groups")