## 1. Prepare files


In [5]:
male_names_dir = "/content/drive/MyDrive/cpsc532/inference/male_names.csv"
female_names_dir = "/content/drive/MyDrive/cpsc532/inference/female_names.csv"
unisex_names_dir = "/content/drive/MyDrive/cpsc532/inference/unisex_names.csv"
events_dir = "/content/drive/MyDrive/cpsc532/inference/events_extracted_processed.txt"

model_dir = "/content/drive/MyDrive/cpsc532/comet-atomic-2020-master/models/comet_atomic2020_gpt2/gpt2xl-comet-atomic-2020-3"

female_pred_data_dir = "/content/drive/MyDrive/cpsc532/inference/female_pred_data.tsv"
male_pred_data_dir = "/content/drive/MyDrive/cpsc532/inference/male_pred_data.tsv"
unisex_pred_data_dir = "/content/drive/MyDrive/cpsc532/inference/unisex_pred_data.tsv"

output_dir = "/content/drive/MyDrive/cpsc532/inference"

In [7]:
import random
import pandas as pd
import torch
import os
import numpy as np
import datetime
from tqdm import tqdm
import sys
sys.path.append('/content/drive/MyDrive/cpsc532/comet-atomic-2020-master')

In [8]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
all_relations = [
    "AtLocation",
    "CapableOf",
    "Causes",
    "CausesDesire",
    "CreatedBy",
    "DefinedAs",
    "DesireOf",
    "Desires",
    "HasA",
    "HasFirstSubevent",
    "HasLastSubevent",
    "HasPainCharacter",
    "HasPainIntensity",
    "HasPrerequisite",
    "HasProperty",
    "HasSubEvent",
    "HasSubevent",
    "HinderedBy",
    "InheritsFrom",
    "InstanceOf",
    "IsA",
    "LocatedNear",
    "LocationOfAction",
    "MadeOf",
    "MadeUpOf",
    "MotivatedByGoal",
    "NotCapableOf",
    "NotDesires",
    "NotHasA",
    "NotHasProperty",
    "NotIsA",
    "NotMadeOf",
    "ObjectUse",
    "PartOf",
    "ReceivesAction",
    "RelatedTo",
    "SymbolOf",
    "UsedFor",
    "isAfter",
    "isBefore",
    "isFilledBy",
    "oEffect",
    "oReact",
    "oWant",
    "xAttr",
    "xEffect",
    "xIntent",
    "xNeed",
    "xReact",
    "xReason",
    "xWant",
    ]

## 2. Prepare input dataset for inference


In [None]:
def set_seed(seed: int = 42):
    """Function to control randomness in the code."""
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

In [None]:
def load_names(file_path):
    """Load names from a single-column CSV file with a header."""
    df = pd.read_csv(file_path)  # header is assumed
    if df.shape[1] != 1:
        raise ValueError(f"Expected 1 column in {file_path}, but got {df.shape[1]}")
    return df.iloc[:, 0].dropna().tolist()

def load_events(file_path):
    """Load events from a text file and remove numbering."""
    with open(file_path, 'r', encoding='utf-8') as file:
        events = [line.split(". ", 1)[1].strip() for line in file if ". " in line]
    return events

def prepare_dataset(names, events, output_dir):
    # Build the dataset: name + event with every relation
    rows = []
    for event in events:
        name = random.choice(names)
        full_event = f"{name} {event}"
        for relation in all_relations:
            rows.append({
                "head_event": full_event,
                "relation": relation,
                "tail_event": "[EMPTY]"  # Placeholder tail
            })
    pred_dataset = pd.DataFrame(rows)

    # Save as TSV for gpt2 model inference
    pred_dataset.to_csv(output_dir, sep="\t", index=False)

    print(f"Saved pred dataset with {len(pred_dataset)} rows to: {output_dir}")


In [None]:
# load names and events
male_names = load_names(male_names_dir)
female_names = load_names(female_names_dir)
unisex_names = load_names(unisex_names_dir)
events = load_events(events_dir)

# prepare dataset for inference
prepare_dataset(male_names, events, male_pred_data_dir)
prepare_dataset(female_names, events, female_pred_data_dir)
prepare_dataset(unisex_names, events, unisex_pred_data_dir)

Saved pred dataset with 20400 rows to: /content/drive/MyDrive/cpsc532/inference/male_pred_data.tsv


## 3. Generate inference


### 3.1 Set up and load model
This part is largely adapted from the link below for our proposed use.

 https://github.com/allenai/comet-atomic-2020/blob/master/models/comet_atomic2020_gpt2/comet_gpt2.py

In [10]:
#Partially from https://github.com/allenai/comet-atomic-2020/blob/master/models/comet_atomic2020_gpt2/comet_gpt2.py

import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
import json
from typing import List
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import os
from torch import cuda
import re
import datetime




In [11]:
!ls /content/drive/MyDrive/cpsc532/comet-atomic-2020-master


 beaker_exp				   header.png	  models        requirements.txt   utils.py
 comet_gpt2_inferences.ipynb		   human_eval	  mosaic        scripts
 comet_gpt2_inferences_unisex.ipynb	   KGDataset.py   __pycache__   split
'Copy of Copy of comet_inferences.ipynb'   modeling.py	  README.md     system_eval


In [12]:
!cp /content/drive/MyDrive/cpsc532/comet-atomic-2020-master/KGDataset.py /content/

device = 'cuda' if cuda.is_available() else 'cpu'

def beam_generations(tokenizer, model, device, loader, top_k=40):
    # This method assumes batch size of 1
    model.eval()
    predictions = []
    actuals = []
    sources = []
    records = []


    with torch.no_grad():
        for idx, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype=torch.long)
            ids = data['source_ids'].to(device, dtype=torch.long)
            mask = data['source_mask'].to(device, dtype=torch.long)

            generated_ids = model.generate(
                input_ids=ids,
                attention_mask=mask,
                temperature=1.0,
                do_sample=False,
                max_length=int(os.environ['OUT_LEN']),
                top_p=0.9,
                top_k=top_k,
                repetition_penalty=1.0,
                num_return_sequences=5 if top_k > 1 else 1,
                num_beams=5,
                pad_token_id=tokenizer.eos_token_id
            )

            preds = [tokenizer.decode(g, clean_up_tokenization_spaces=True) for g in generated_ids]
            try:
                target = [tokenizer.decode(t, clean_up_tokenization_spaces=True) for t in y]
            except:
                target = ['']
            source = [tokenizer.decode(s, clean_up_tokenization_spaces=True) for s in ids]

            records.append({
                'source': source[0],
                'target': target[0],
                'generations': preds
            })

            # ðŸ§¾ Add progress print here every 10 examples
            if (idx + 1) % 10 == 0:
                print(f"âœ… Processed {idx + 1} examples...")

    return records

from KGDataset import KGDataset


In [13]:
def beam_generations_batch(tokenizer, model, device, loader, top_k=40):
    model.eval()
    records = []
    num_return = 5  # desired number of generations per input
    num_beams = max(5, num_return)  # usually set beams >= return_sequences

    with torch.no_grad():
        for batch_idx, data in enumerate(loader):
            y = data['target_ids'].to(device, dtype=torch.long)
            ids = data['source_ids'].to(device, dtype=torch.long)
            mask = data['source_mask'].to(device, dtype=torch.long)

            batch_size = ids.size(0)

            generated_ids = model.generate(
                input_ids=ids,
                attention_mask=mask,
                max_length=int(os.environ['OUT_LEN']),
                num_beams=num_beams,
                num_return_sequences=num_return,
                do_sample=False,
                top_k=top_k,
                top_p=0.9,
                repetition_penalty=1.0,
                temperature=1.0,
                pad_token_id=tokenizer.eos_token_id
            )

            # generated_ids shape: (batch_size * num_return, seq_len)
            decoded_preds = [tokenizer.decode(g, skip_special_tokens=True) for g in generated_ids]

            for i in range(batch_size):
                source = tokenizer.decode(ids[i], skip_special_tokens=True)
                target = tokenizer.decode(y[i], skip_special_tokens=True) if y is not None else ""

                # Slice the generations for this example
                start_idx = i * num_return
                end_idx = start_idx + num_return
                generations = decoded_preds[start_idx:end_idx]

                records.append({
                    "source": source,
                    # "target": target,
                    "generations": generations
                })

            if batch_idx % 10 == 0:
                print(f"Processed {batch_idx * batch_size} examples...")

    return records


In [14]:
import wandb

wandb.init(project="gpt2_comet_atomic")

config = wandb.config

config.TRAIN_BATCH_SIZE = int(os.environ.get("TRAIN_BATCH_SIZE", 2))
config.VALID_BATCH_SIZE = int(os.environ.get("VALID_BATCH_SIZE", 2))
config.TRAIN_EPOCHS = int(os.environ.get("TRAIN_EPOCHS", 3))
config.VAL_EPOCHS = int(os.environ.get("VAL_EPOCHS", 1))
config.LEARNING_RATE = float(os.environ.get("LEARNING_RATE", "1e-5"))
config.SEED = int(os.environ.get("SEED", 42))
config.IN_LEN = int(os.environ.get("IN_LEN", 16))
config.OUT_LEN = int(os.environ.get("OUT_LEN", 20))
config.SUMMARY_LEN = 0 # Used for t5
config.OUT_DIR = os.environ.get("OUT_DIR", "/models")
config.DO_TRAIN = os.environ.get("DO_TRAIN", "False") == "False"
config.DO_PRED = os.environ.get("DO_PRED", "True") == "True"
config.PRED_FILE = str(os.environ.get("PRED_FILE", ""))
config.TOP_K = int(os.environ.get("TOP_K", 40))
config.PRED_BATCH = 64
config.TOKENIZER = os.environ.get('TOKENIZER', "gpt2-xl")

torch.manual_seed(config.SEED)  # pytorch random seed
np.random.seed(config.SEED)  # numpy random seed
torch.backends.cudnn.deterministic = True


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 Â·Â·Â·Â·Â·Â·Â·Â·Â·Â·


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mzhangxiyu100[0m ([33mzhangxiyu100-ubc[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [15]:
model_name = "gpt2" if 'GPT2_MODEL' not in os.environ else os.environ['GPT2_MODEL']

try:
    tokenizer = GPT2Tokenizer.from_pretrained(model_name)
except:
    tokenizer = GPT2Tokenizer.from_pretrained(config.TOKENIZER)

tokenizer.add_special_tokens({
    'eos_token': '[EOS]',
    'additional_special_tokens': [
        'LocationOfAction',
        'HinderedBy',
        'HasFirstSubevent',
        'NotHasProperty',
        'NotHasA',
        'HasA',
        'AtLocation',
        'NotCapableOf',
        'CausesDesire',
        'HasPainCharacter',
        'NotDesires',
        'MadeUpOf',
        'InstanceOf',
        'SymbolOf',
        'xReason',
        'isAfter',
        'HasPrerequisite',
        'UsedFor',
        'MadeOf',
        'MotivatedByGoal',
        'Causes',
        'oEffect',
        'CreatedBy',
        'ReceivesAction',
        'NotMadeOf',
        'xWant',
        'PartOf',
        'DesireOf',
        'HasPainIntensity',
        'xAttr',
        'DefinedAs',
        'oReact',
        'xIntent',
        'HasSubevent',
        'oWant',
        'HasProperty',
        'IsA',
        'HasSubEvent',
        'LocatedNear',
        'Desires',
        'isFilledBy',
        'isBefore',
        'InheritsFrom',
        'xNeed',
        'xEffect',
        'xReact',
        'HasLastSubevent',
        'RelatedTo',
        'CapableOf',
        'NotIsA',
        'ObjectUse',
        '[GEN]'
    ]
})
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
female_dataset = pd.read_csv(female_pred_data_dir, sep="\t")
male_dataset = pd.read_csv(male_pred_data_dir, sep="\t")
unisex_dataset = pd.read_csv(unisex_pred_data_dir, sep="\t")

for dataset in [female_dataset, male_dataset, unisex_dataset]:
    dataset.head_event = dataset.head_event + ' ' + dataset.relation + " [GEN]"
    dataset.tail_event = dataset.tail_event + ' [EOS]'

female_set = KGDataset(female_dataset, tokenizer, config.IN_LEN, config.OUT_LEN - config.IN_LEN, model="gpt2", is_eval=True)
male_set = KGDataset(male_dataset, tokenizer, config.IN_LEN, config.OUT_LEN - config.IN_LEN, model="gpt2", is_eval=True)
unisex_set = KGDataset(unisex_dataset, tokenizer, config.IN_LEN, config.OUT_LEN - config.IN_LEN, model="gpt2", is_eval=True)


val_params = {
    'batch_size': 1,
    'shuffle': False,
    'num_workers': 0
}

val_params = {'batch_size': 1, 'shuffle': False, 'num_workers': 0}
female_loader = DataLoader(female_set, **val_params, drop_last=False)
male_loader = DataLoader(male_set, **val_params, drop_last=False)
unisex_loader = DataLoader(unisex_set, **val_params, drop_last=False)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

In [16]:
print("Loading model from {}".format(model_name))
model = GPT2LMHeadModel.from_pretrained(model_dir)
print("Move model to device {}".format(device))
model = model.to(device)
model.resize_token_embeddings(len(tokenizer))

optimizer = torch.optim.Adam(params=model.parameters(), lr=config.LEARNING_RATE)

Loading model from gpt2
Move model to device cpu


### 3.2 Generate inferences from prepared dataset

In [1]:
# Helper function to extract generated text between [GEN] and [EOS]
def extract_gen(text):
    match = re.search(r'\[GEN\](.*?)\[EOS\]', text)
    return match.group(1).strip() if match else text


In [3]:
def generate_inference(pred_data_dir, output_dir, gender):
    pred_dataset = pd.read_csv(pred_data_dir, encoding='latin-1', sep="\t").head(25)

    pred_dataset = pred_dataset.drop_duplicates(['head_event', 'relation'], ignore_index=True)

    pred_dataset.head_event = pred_dataset.head_event + ' ' + pred_dataset.relation + " [GEN]"
    pred_dataset.tail_event = pred_dataset.tail_event + ' [EOS]'

    pred_set = KGDataset(pred_dataset, tokenizer, config.IN_LEN, config.OUT_LEN - config.IN_LEN, model="gpt2", is_eval=True)
    pred_loader = DataLoader(pred_set, **val_params, drop_last=False)

    os.environ['OUT_LEN'] = str(config.OUT_LEN)
    pred_generations = beam_generations(tokenizer, model, device, pred_loader, top_k=config.TOP_K)
    # pred_generations = beam_generations_batch(tokenizer, model, device, pred_loader, top_k=config.TOP_K)

    # clean up and format the output
    # cleaned_records = []
    cleaned_records = pred_generations
    # for r in pred_generations:
    #     source = r['source']
    #     gens = [extract_gen(g) for g in r['generations']]
    #     joined_gens = ', '.join(gens)
    #     cleaned_records.append({
    #         'source': source,
    #         'generations': joined_gens
    #     })


    df = pd.DataFrame(cleaned_records)

    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    output_file = f"{output_dir}/comet_inferences_gpt2_{gender}_{timestamp}.csv"

    # Save predictions
    df.to_csv(output_file, index=False)
    print(f"Inferences saved to '{output_file}'")


In [17]:
# try 25
# generate_inference(female_pred_data_dir, output_dir, "female")



âœ… Processed 10 examples...
âœ… Processed 20 examples...
Inferences saved to '/content/drive/MyDrive/cpsc532/inference/comet_inferences_gpt2_female_2025-03-24_19-52-09.csv'


In [None]:
generate_inference(female_pred_data_dir, output_dir, "female")



âœ… Processed 10 examples...
âœ… Processed 20 examples...
âœ… Processed 30 examples...
âœ… Processed 40 examples...
âœ… Processed 50 examples...
âœ… Processed 60 examples...
âœ… Processed 70 examples...
âœ… Processed 80 examples...
âœ… Processed 90 examples...
âœ… Processed 100 examples...
âœ… Processed 110 examples...
âœ… Processed 120 examples...
âœ… Processed 130 examples...
âœ… Processed 140 examples...
âœ… Processed 150 examples...
âœ… Processed 160 examples...
âœ… Processed 170 examples...
âœ… Processed 180 examples...
âœ… Processed 190 examples...
âœ… Processed 200 examples...
âœ… Processed 210 examples...
âœ… Processed 220 examples...
âœ… Processed 230 examples...
âœ… Processed 240 examples...
âœ… Processed 250 examples...
âœ… Processed 260 examples...
âœ… Processed 270 examples...
âœ… Processed 280 examples...
âœ… Processed 290 examples...
âœ… Processed 300 examples...
âœ… Processed 310 examples...
âœ… Processed 320 examples...
âœ… Processed 330 examples...
âœ… Processed 340 e

In [None]:
generate_inference(male_pred_data_dir, output_dir, "male")



âœ… Processed 10 examples...
âœ… Processed 20 examples...
âœ… Processed 30 examples...
âœ… Processed 40 examples...
âœ… Processed 50 examples...
âœ… Processed 60 examples...
âœ… Processed 70 examples...
âœ… Processed 80 examples...
âœ… Processed 90 examples...
âœ… Processed 100 examples...
âœ… Processed 110 examples...
âœ… Processed 120 examples...
âœ… Processed 130 examples...
âœ… Processed 140 examples...
âœ… Processed 150 examples...
âœ… Processed 160 examples...
âœ… Processed 170 examples...
âœ… Processed 180 examples...
âœ… Processed 190 examples...
âœ… Processed 200 examples...
âœ… Processed 210 examples...
âœ… Processed 220 examples...
âœ… Processed 230 examples...
âœ… Processed 240 examples...
âœ… Processed 250 examples...
âœ… Processed 260 examples...
âœ… Processed 270 examples...
âœ… Processed 280 examples...
âœ… Processed 290 examples...
âœ… Processed 300 examples...
âœ… Processed 310 examples...
âœ… Processed 320 examples...
âœ… Processed 330 examples...
âœ… Processed 340 e

In [None]:
generate_inference(unisex_pred_data_dir, output_dir, "unisex")



âœ… Processed 10 examples...
âœ… Processed 20 examples...
âœ… Processed 30 examples...
âœ… Processed 40 examples...
âœ… Processed 50 examples...
âœ… Processed 60 examples...
âœ… Processed 70 examples...
âœ… Processed 80 examples...
âœ… Processed 90 examples...
âœ… Processed 100 examples...
âœ… Processed 110 examples...
âœ… Processed 120 examples...
âœ… Processed 130 examples...
âœ… Processed 140 examples...
âœ… Processed 150 examples...
âœ… Processed 160 examples...
âœ… Processed 170 examples...
âœ… Processed 180 examples...
âœ… Processed 190 examples...
âœ… Processed 200 examples...
âœ… Processed 210 examples...
âœ… Processed 220 examples...
âœ… Processed 230 examples...
âœ… Processed 240 examples...
âœ… Processed 250 examples...
âœ… Processed 260 examples...
âœ… Processed 270 examples...
âœ… Processed 280 examples...
âœ… Processed 290 examples...
âœ… Processed 300 examples...
âœ… Processed 310 examples...
âœ… Processed 320 examples...
âœ… Processed 330 examples...
âœ… Processed 340 e

Clean up the output

In [19]:
import pandas as pd
import re

def clean_generation(csv_path, path_out):
    # Load the CSV
    df = pd.read_csv(csv_path)

    # Function to remove [GEN] and everything after, plus one word before
    def clean_generation(text):
        if isinstance(text, str) and '[GEN]' in text:
            # Split the string into words
            words = text.split()
            try:
                gen_index = words.index('[GEN]')
                # Remove the word before [GEN] and everything after
                words = words[:gen_index - 1]  # up to the word before [GEN]
            except ValueError:
                pass  # [GEN] not found safely
            return ' '.join(words)
        return text

    # Apply the function to the 'generations' column
    df['generations'] = df['generations'].apply(clean_generation)

    # Preview result
    print(df.head())

    # Optionally save the cleaned CSV
    df.to_csv(path_out, index=False)
    print(f"Cleaned CSV saved to '{path_out}'")



In [21]:
# Path to your CSV file in Google Drive (modify this)
csv_path_unisex = "/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_unisex_2025-03-24_12-02-20.csv"
csv_path_female = "/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_female_2025-03-24_14-24-21.csv"
csv_path_male = "/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_male_2025-03-24_14-57-51.csv"
path_out_unisex = "/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_unisex_cleaned.csv"
path_out_female = "/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_female_cleaned.csv"
path_out_male = "/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_male_cleaned.csv"



clean_generation(csv_path_unisex, path_out_unisex)

                                              source  \
0  Emerson argued with the designer  AtLocation  ...   
1  Emerson argued with the designer  CapableOf   ...   
2   Emerson argued with the designer  Causes   [GEN]   
3  Emerson argued with the designer  CausesDesire...   
4  Emerson argued with the designer  CreatedBy   ...   

                                         generations  
0  store, conference, building, designer's hand, ...  
1  design, write letter, note design, write lette...  
2  none, argument, confused, disappointed, disapp...  
3  designer, design, designer, designs, designer'...  
4               design, designer, designer, designer  
Cleaned CSV saved to '/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_unisex_cleaned.csv'


In [22]:
clean_generation(csv_path_female, path_out_female)

                                              source  \
0  Amanda argued with the designer  AtLocation   ...   
1  Amanda argued with the designer  CapableOf   [...   
2    Amanda argued with the designer  Causes   [GEN]   
3  Amanda argued with the designer  CausesDesire ...   
4  Amanda argued with the designer  CreatedBy   [...   

                                         generations  
0  fashion show, theater, store, designer's offic...  
1  object to design, object to idea, dismiss desi...  
2  none, argument, disappointed, the design to be...  
3      design, designer, designer, designs, designer  
4      design, designer, designer, designs, designer  
Cleaned CSV saved to '/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_female_cleaned.csv'


In [23]:
clean_generation(csv_path_male, path_out_male)

                                              source  \
0  Logan argued with the designer  AtLocation   [...   
1  Logan argued with the designer  CapableOf   [GEN]   
2     Logan argued with the designer  Causes   [GEN]   
3  Logan argued with the designer  CausesDesire  ...   
4  Logan argued with the designer  CreatedBy   [GEN]   

                                         generations  
0  garage, store, building, designer's office, th...  
1  change design, design, note design, discover n...  
2  argument, none, disappointed, lose money, disa...  
3  design, designer, designer, design, designer's...  
4     design, designer, designer's opinion, designer  
Cleaned CSV saved to '/content/drive/MyDrive/cpsc532/inference/output/comet_inferences_gpt2_male_cleaned.csv'
