In [None]:
!pip install transformers[torch]
!pip install numba
!pip install accelerate -U
!pip install wonderwords

Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->transformers[torch])
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->transformers[torch])
  Using cached nvidia_cublas_cu

## Libraries

In [None]:
import numpy as np
import pandas as pd
from wonderwords import RandomWord

import torch
import torch.nn as nn
import torch.optim as optim
import transformers
from transformers import BertModel, T5ForConditionalGeneration, BertTokenizer, T5Tokenizer

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import random_split

In [None]:
## Load datasets
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
model_checkpoints_path = '/content/drive/My Drive/ds266proj/model_checkpoints'

# Read data from to Google Drive
hs_all_data = pd.read_csv('/content/drive/My Drive/ds266proj/hs_cards_data_text.csv')
nr_all_data = pd.read_csv('/content/drive/My Drive/ds266proj/nr_cards_data_text.csv')
hs_fakes_data = pd.read_csv('/content/drive/My Drive/ds266proj/hs_fakes_withcols.csv')
nr_fakes_data = pd.read_csv('/content/drive/My Drive/ds266proj/nr_fakes_withcols.csv')

nr_fakes_data = nr_fakes_data[['description', 'faction', 'type']]
hs_fakes_data = hs_fakes_data[['description', 'class', 'type']]

Mounted at /content/drive


In [None]:
## Add new column for IDing
print(nr_fakes_data.head())
print(hs_fakes_data.head())

                                         description       faction    type
0  The card named Squida is a 3 advancement agend...  neutral-corp  agenda
1  The card named "The Legacy of the Nation" is a...  neutral-corp  agenda
2  The card named The Edge of the Community II is...  neutral-corp  agenda
3  The card named Grain is a 5 advancement agenda...  neutral-corp  agenda
4  The card named Interpolation is a 3 advancemen...  neutral-corp  agenda
                                         description   class   type
0  The card named Rift Reap is a 1 cost holy spel...  Priest  Spell
1  The card named Dreadspell is a 3 cost spell . ...  Priest  Spell
2  The card named Assault on Immortals is a 2 cos...  Priest  Spell
3  The card named Shadow Frozen is a 6-cost shado...  Priest  Spell
4  The card named Priest of the Night is a 2 cost...  Priest  Spell


In [None]:
## Create the labeled data
all_nr_desc = list(nr_all_data['description']) + list(nr_fakes_data['description'])
all_hs_desc = list(hs_all_data['description']) + list(hs_fakes_data['description'])

nr_labels = [1]*len(nr_all_data) + [0]*len(nr_fakes_data)
hs_labels = [1]*len(hs_all_data) + [0]*len(hs_fakes_data)

## NetRunner


### BERT Classifier

In [None]:
## Prepare the BERT model to be used for classification
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
## Tokenize the data
nr_tokenized = [tokenizer.encode(description, add_special_tokens=True, max_length=512, truncation=True) for description in all_nr_desc]
hs_tokenized = [tokenizer.encode(description, add_special_tokens=True, max_length=512, truncation=True) for description in all_hs_desc]

# Ensure all tokenized sequences have the same length by padding shorter sequences
max_seq_length_nr = max(len(seq) for seq in nr_tokenized)
padded_nr_tokenized = [seq + [0]*(max_seq_length_nr - len(seq)) for seq in nr_tokenized]
max_seq_length_hs = max(len(seq) for seq in hs_tokenized)
padded_hs_tokenized = [seq + [0]*(max_seq_length_hs - len(seq)) for seq in hs_tokenized]


In [None]:
## Create the dataloader and data to use with the loader
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
class DescriptionData(Dataset):
    def __init__(self, tokenized_descriptions, labels):
        self.tokenized_descriptions = tokenized_descriptions
        self.labels = labels

    def __len__(self):
        return len(self.tokenized_descriptions)

    def __getitem__(self, idx):
        return torch.tensor(self.tokenized_descriptions[idx]), torch.tensor(self.labels[idx])

nr_dataset = DescriptionData(padded_nr_tokenized, nr_labels)
hs_dataset = DescriptionData(padded_hs_tokenized, hs_labels)

## Prepare the training and test datasets
#+++++++++++++++++++++++++++++++++++++++++++++++++
total_size_nr = len(nr_dataset)
train_size_nr = int(0.7 * total_size_nr)
val_size_nr = total_size_nr - train_size_nr

total_size_hs = len(hs_dataset)
train_size_hs = int(0.7 * total_size_hs)
val_size_hs = total_size_hs - train_size_hs

# Split dataset into training and validation sets
train_nr_dataset, val_nr_dataset = random_split(nr_dataset, [train_size_nr, val_size_nr])
train_hs_dataset, val_hs_dataset = random_split(hs_dataset, [train_size_hs, val_size_hs])

# Create dataloaders for training and validation
train_nr_dataloader = DataLoader(train_nr_dataset, batch_size=32, shuffle=True)
val_nr_dataloader = DataLoader(val_nr_dataset, batch_size=32, shuffle=False)

train_hs_dataloader = DataLoader(train_hs_dataset, batch_size=32, shuffle=True)
val_hs_dataloader = DataLoader(val_hs_dataset, batch_size=32, shuffle=False)


In [None]:
## Create the classifier model
class CardDetectiveBERT(nn.Module):
    def __init__(self, bert_model, num_classes = 2):
        super(CardDetectiveBERT, self).__init__()
        self.bert = bert_model
        self.fc = nn.Linear(bert_model.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        pooled_output = outputs[1]
        logits = self.fc(pooled_output)
        return logits

### GAN Model

### NetRunner

In [None]:
## GAN MODEL SETUP
#++++++++++++++++++++++++++++++++++++++++++++++
# The model consists of a discriminator (BERT classifier) and a generator (T5 model)

# First we load in the pre-trained models for each

# T5 model
model_name = 't5-base'
dir_path = '/content/drive/My Drive/ds266proj/model_checkpoints/'
file_path = dir_path + 't5base-finetuned-nr-generation'
modelsave = file_path + "/pt_model"
t5_nr_model = T5ForConditionalGeneration.from_pretrained(modelsave)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_nr_model.to(device)

# BERT Discriminator
bert_model = BertModel.from_pretrained('bert-base-uncased')
nr_classifier = CardDetectiveBERT(bert_model, num_classes=2)
nr_class_path = dir_path + "GAN/NetRunner/CardDetectiveBERT.pth"
nr_classifier.load_state_dict(torch.load(nr_class_path))
# Set the model to evaluation mode for use in GAN
nr_classifier.eval()
nr_classifier.to(device)

## Train only the last three layers of the generator
for name, param in t5_nr_model.named_parameters():
    param.requires_grad = False
unfreeze_layers = ['block.11', 'block.10', 'block.9']
for name, param in t5_nr_model.named_parameters():
    if any(unfreeze_layer in name for unfreeze_layer in unfreeze_layers):
        param.requires_grad = True

optimizer_discriminator = optim.Adam(nr_classifier.parameters(), lr=0.00002)
optimizer_generator = optim.Adam(filter(lambda p: p.requires_grad, t5_nr_model.parameters()), lr=0.00002)

t5_tokenizer = T5Tokenizer.from_pretrained(model_name)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
## Parameters for the gan
num_epochs = 20
np.random.seed(2319)
seedword = RandomWord()
criterion = nn.CrossEntropyLoss()

In [None]:
## Create a separate data loader for real data only
real_nr_desc = list(nr_all_data['description'])
real_nr_tokenized = [tokenizer.encode(description, add_special_tokens=True, max_length=512, truncation=True) for description in real_nr_desc]

# Ensure all tokenized sequences have the same length by padding shorter sequences
max_seq_length_nr = max(len(seq) for seq in real_nr_tokenized)
padded_real_nr_tokenized = [seq + [0]*(max_seq_length_nr - len(seq)) for seq in real_nr_tokenized]

real_nr_dataset = DescriptionData(padded_real_nr_tokenized, nr_labels)
real_nr_dataloader = DataLoader(real_nr_dataset, batch_size=32, shuffle=True)

In [None]:
## HELPER FUNCTIONS FOR GAN
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def generate_fake_strings(t5_model, t5_tokenizer):
  ## Prompt Creation -- smaller batches since this must be done many times
  nr_gen_prompts = []
  corpclasses = ['neutral-corp', 'jinteki', 'weyland-consortium', 'nbn', 'haas-bioroid']
  corpcardtypes = ['agenda'] * 2 + ['asset'] * 5 + ['upgrade'] * 2 + ['operation'] * 5 + ['ice'] * 2 + ['identity'] * 1
  runnerclasses = ['shaper', 'neutral-runner', 'criminal', 'anarch']
  runnercardtypes = ['event'] * 4 + ['program'] * 4 + ['resource'] * 3 + ['hardware'] * 3 + ['identity'] * 1

  ## Generate corp prompts
  for thisclass in corpclasses:
    for thistype in corpcardtypes:
      newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
      nr_gen_prompts.append(newprompt)

  ## Generate runner promprs
  for thisclass in runnerclasses:
    for thistype in runnercardtypes:
      newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
      nr_gen_prompts.append(newprompt)

  ## Encode prompts for generation
  nr_fake_gens = []
  for i, prompt in enumerate(nr_gen_prompts):
    inputs = t5_tokenizer([prompt], return_tensors='pt')
    outputs = t5_nr_model.generate(inputs['input_ids'].cuda(),
                                   max_length=100,
                                   do_sample=True,
                                   top_k=40,
                                   temperature=1)
    for out_ids in outputs:
      candidate = t5_tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
      nr_fake_gens.append(candidate)

  return nr_fake_gens

In [None]:
## Create the training loop for the generator-only training GAN
for epoch in range(num_epochs):
  ## Based on https://www.run.ai/guides/deep-learning-for-computer-vision/pytorch-gan#3

  ## Only the generator is trained here
  t5_nr_model.train()
  optimizer_generator.zero_grad()

  ## Generate a batch of fake strings using the T5 generator
  fake_strings = generate_fake_strings(t5_nr_model, t5_tokenizer)
  fake_strings_tokenized = [tokenizer.encode(string, add_special_tokens=True, max_length=512, truncation=True) for string in fake_strings]
  max_seq_length = max(len(seq) for seq in fake_strings_tokenized)
  padded_fake_strings_tokenized = [seq + [0]*(max_seq_length - len(seq)) for seq in fake_strings_tokenized]

  ## Pretend that these are real outputs; desired label ("target" label is 1, i.e. real)
  labels = torch.ones(len(fake_strings), dtype=torch.long).to(device)
  fake_inputs = torch.tensor(padded_fake_strings_tokenized).to(device)

  ## Compute the predicted labels
  classifier_outputs = nr_classifier(fake_inputs, attention_mask=(fake_inputs != 0).long())
  sigmoid_output = torch.sigmoid(classifier_outputs)

  ## Print some metrics
  _, predicted = torch.max(classifier_outputs, 1)
  print(f"Fooled the classifier {torch.sum(predicted).item()} times.")
  indices = list(torch.nonzero(predicted).flatten().cpu().numpy())
  foolers = [fake_strings[i] for i in indices]
  if len(foolers) > 0:
    print("Here's an example:")
    print(foolers[0])

  ## Compute BERT classifier loss for spoofed strings
  loss = criterion(sigmoid_output, labels)
  print(f"Loss: {loss}")
  loss.backward()
  optimizer_generator.step()

  # Save the T5 generator model after each epoch if needed
  print(f"Epoch {epoch+1}/{num_epochs} completed!")
  print("-"*100)

Fooled the classifier 14 times.
Here's an example:
The card named Float Hosk XE is a 0 cost 0 strength ice with the subtypes decoder, . It has an influence requirement of 2. The card text says If you loose 10 tags on this ice, then gain 6 points.
Loss: 1.1524910926818848
Epoch 1/20 completed!
----------------------------------------------------------------------------------------------------
Fooled the classifier 22 times.
Here's an example:
The card named Cogs is a 1 cost asset with the subtypes biotechnology. It has an influence requirement of 0. The card text says When your turn begins, you may spend 1 credit, rez one. Access data: The card text says Gain +1 credit when the Runner is sacked.
Loss: 1.1068764925003052
Epoch 2/20 completed!
----------------------------------------------------------------------------------------------------
Fooled the classifier 19 times.
Here's an example:
The card named Investiest is a 4 advancement agenda worth 3 points with the subtypes . It has an 

In [None]:
nr_gan_path = model_checkpoints_path + "/GAN/NetRunner/Latest.pth"
torch.save(t5_nr_model.state_dict(), nr_gan_path)

### Hearthstone

In [None]:
## GAN MODEL SETUP
#++++++++++++++++++++++++++++++++++++++++++++++
# The model consists of a discriminator (BERT classifier) and a generator (T5 model)

# First we load in the pre-trained models for each

# T5 model
model_name = 't5-base'
dir_path = '/content/drive/My Drive/ds266proj/model_checkpoints/'
file_path = dir_path + 't5base-finetuned-hs-generation'
modelsave = file_path + "/pt_model"
t5_hs_model = T5ForConditionalGeneration.from_pretrained(modelsave)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_hs_model.to(device)

# BERT Discriminator
bert_model = BertModel.from_pretrained('bert-base-uncased')
hs_classifier = CardDetectiveBERT(bert_model, num_classes=2)
hs_class_path = dir_path + "GAN/Hearthstone/CardDetectiveBERT.pth"
hs_classifier.load_state_dict(torch.load(hs_class_path))
# Set the model to evaluation mode for use in GAN
hs_classifier.eval()
hs_classifier.to(device)

## Train only the last three layers of the generator
for name, param in t5_hs_model.named_parameters():
    param.requires_grad = False
unfreeze_layers = ['block.11', 'block.10', 'block.9']
for name, param in t5_hs_model.named_parameters():
    if any(unfreeze_layer in name for unfreeze_layer in unfreeze_layers):
        param.requires_grad = True

optimizer_discriminator = optim.Adam(hs_classifier.parameters(), lr=0.00002)
optimizer_generator = optim.Adam(filter(lambda p: p.requires_grad, t5_hs_model.parameters()), lr=0.00002)

t5_tokenizer = T5Tokenizer.from_pretrained(model_name)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
## Parameters for the gan
num_epochs = 20
np.random.seed(2319)
seedword = RandomWord()
criterion = nn.CrossEntropyLoss()

In [None]:
## HELPER FUNCTIONS FOR GAN
#++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
def generate_fake_strings_hs(t5_model, t5_tokenizer):
  ## Prompt Creation -- smaller batches since this must be done many times
  hs_gen_prompts = []
  classes = ['Priest', 'Hunter', 'Rogue', 'Paladin', 'Warlock', 'Neutral', 'Shaman', 'Druid', 'Mage', 'Warrior', 'Deathknight']
  cardtypes = ['Spell'] * 10 + ['Minion'] * 10 + ['Hero'] * 2 + ['Weapon'] * 3

  ## Generate card prompts
  for thisclass in classes:
    for thistype in cardtypes:
      newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
      hs_gen_prompts.append(newprompt)

  ## Encode prompts for generation
  hs_fake_gens = []
  for i, prompt in enumerate(hs_gen_prompts):
    inputs = t5_tokenizer([prompt], return_tensors='pt')
    outputs = t5_hs_model.generate(inputs['input_ids'].cuda(),
                                   max_length=64,
                                   do_sample=True,
                                   top_k=50,
                                   temperature=1)
    for out_ids in outputs:
      candidate = t5_tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
      hs_fake_gens.append(candidate)

  return hs_fake_gens

In [None]:
## Create the training loop for the generator-only training GAN
for epoch in range(num_epochs):
  ## Based on https://www.run.ai/guides/deep-learning-for-computer-vision/pytorch-gan#3

  ## Only the generator is trained here
  t5_hs_model.train()
  optimizer_generator.zero_grad()

  ## Generate a batch of fake strings using the T5 generator
  fake_strings = generate_fake_strings_hs(t5_hs_model, t5_tokenizer)
  fake_strings_tokenized = [tokenizer.encode(string, add_special_tokens=True, max_length=512, truncation=True) for string in fake_strings]
  max_seq_length = max(len(seq) for seq in fake_strings_tokenized)
  padded_fake_strings_tokenized = [seq + [0]*(max_seq_length - len(seq)) for seq in fake_strings_tokenized]

  ## Pretend that these are real outputs; desired label ("target" label is 1, i.e. real)
  labels = torch.ones(len(fake_strings), dtype=torch.long).to(device)
  fake_inputs = torch.tensor(padded_fake_strings_tokenized).to(device)

  ## Compute the predicted labels
  classifier_outputs = hs_classifier(fake_inputs, attention_mask=(fake_inputs != 0).long())
  sigmoid_output = torch.sigmoid(classifier_outputs)

  ## Print some metrics
  _, predicted = torch.max(classifier_outputs, 1)
  print(f"Fooled the classifier {torch.sum(predicted).item()} times.")
  indices = list(torch.nonzero(predicted).flatten().cpu().numpy())
  foolers = [fake_strings[i] for i in indices]
  if len(foolers) > 0:
    print("Here's an example:")
    print(foolers[0])

  ## Compute BERT classifier loss for spoofed strings
  loss = criterion(sigmoid_output, labels)
  print(f"Loss: {loss}")
  loss.backward()
  optimizer_generator.step()

  # Save the T5 generator model after each epoch if needed
  print(f"Epoch {epoch+1}/{num_epochs} completed!")
  print("-"*100)

Fooled the classifier 95 times.
Here's an example:
The card named Rewindling is a 9 cost shadow spell . The card text says: Draw a spell. If it's a shadow spell, drop it into your hand. (Click the play button to reveal the card that’s now being played.)
Loss: 0.9026047587394714
Epoch 1/20 completed!
----------------------------------------------------------------------------------------------------
Fooled the classifier 85 times.
Here's an example:
The card named Spirit of Mercy is a 4 cost holy spell . The card text says: Summon another 1 dash 1 copies of the same to give your minions plus 1 dash plus 1 (gives them 3 health as well).
Loss: 0.9358685612678528
Epoch 2/20 completed!
----------------------------------------------------------------------------------------------------
Fooled the classifier 104 times.
Here's an example:
The card named Divine Mirror is a 5 cost shadow spell . The card text says: Give a random friendly minion plus 3 Health for each other character. (Reward: To

In [None]:
hs_gan_path = model_checkpoints_path + "/GAN/Hearthstone/Latest.pth"
torch.save(t5_hs_model.state_dict(), hs_gan_path)

## Generate new cards for eval

### Hearthstone

In [None]:
# Load the model from the checkpoint
model_name = 't5-base'
dir_path = '/content/drive/My Drive/ds266proj/model_checkpoints/'
file_path = dir_path + 'GAN/Hearthstone'
modelsave = file_path + "/Latest.pth"
t5_tokenizer = T5Tokenizer.from_pretrained(model_name)
t5_model = T5ForConditionalGeneration.from_pretrained(model_name)

# Load the state dict
state_dict = torch.load(modelsave)
t5_model.load_state_dict(state_dict)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_model.to(device)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [None]:
## Generate seeded prompts
np.random.seed(2319)
seedword = RandomWord()

hs_gen_prompts = []
hs_gen_references = []
classes = ['Priest', 'Hunter', 'Rogue', 'Paladin', 'Warlock', 'Neutral', 'Shaman', 'Druid', 'Mage', 'Warrior', 'Deathknight', 'Demonhunter']
cardtypes = ['Spell'] * 100 + ['Minion'] * 125 + ['Hero'] * 5 + ['Weapon'] * 20
## Fille in prompts and references
for thisclass in classes:
  for thistype in cardtypes:
    newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
    hs_gen_prompts.append(newprompt)
    # Take the first 5 references for each example
    ref_data = hs_all_data.loc[(hs_all_data['classes'] == thisclass.upper()) & (hs_all_data[thistype] == 1)]
    if len(ref_data) > 0:
      candidates = list(ref_data['description'].sample(n=5, replace=True))
    else:
      candidates = []
    if len(candidates) == 0:
      candidates = ["NONE"]
    hs_gen_references.append(candidates)

print(hs_gen_prompts)
print(hs_gen_references)



In [None]:
## Generate examples from this model
transformers.logging.set_verbosity_error()
hs_gen_candidates = []
for i, test_input_text in enumerate(hs_gen_prompts):
    test_inputs = t5_tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = t5_model.generate(test_inputs['input_ids'].cuda(),
                                        max_length=64,
                                        do_sample=True,
                                        top_k=50,
                                        temperature=1)
    for out_ids in test_output_ids:
      candidate = t5_tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
      hs_gen_candidates.append(candidate)
    if i % 10 == 0:
      print(f"Example #{i}: {hs_gen_candidates[-1]}")

# Save the generated dataset of fake cards for a starting point
candidates_df = pd.DataFrame(hs_gen_candidates, columns = ['description'])

Example #0: The card named Bloodless Blood is a 5 cost holy spell . The card text says: Give all minions in your hand plus 3 dash plus 3.
Example #10: The card named Bloodstrike is a 4 cost holy spell . The card text says: Give your minions Rush. Summon 3 1 dash 3 Reborns with Rush.
Example #20: The card named Invent the Unholy is a 6 cost shadow spell . The card text says: Draw a card. Each turn, give it plus 3 dash plus 2.
Example #30: The card named Ogre of Light is a 1 cost holy spell , and includes the effects deal damage, battlecry. The card text says: Battlecry: Deal 3 damage. Reward: At the end of your turn, deal 2 damage.
Example #40: The card named Healing Eyes is a 1 cost holy spell , and includes the effects deal damage, trigger visual. The card text says: Give a minion plus 2 dash plus 2. After your hero attacks, deal 2 damage to all minions.
Example #50: The card named Rakuten'ar'a is a 1 cost arcane spell . The card text says: Add a random minion to your hand. Give it pl

In [None]:
hs_fakes_file = '/content/drive/My Drive/ds266proj/hs_fakes_orig_GAN.csv'
candidates_df.to_csv(hs_fakes_file)

### NetRunner

In [None]:
# Load the model from the checkpoint
model_name = 't5-base'
dir_path = '/content/drive/My Drive/ds266proj/model_checkpoints/'
file_path = dir_path + 'GAN/NetRunner'
modelsave = file_path + "/Latest.pth"
t5_tokenizer = T5Tokenizer.from_pretrained(model_name)
t5_model_nr = T5ForConditionalGeneration.from_pretrained(model_name)

# Load the state dict
state_dict = torch.load(modelsave)
t5_model_nr.load_state_dict(state_dict)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t5_model_nr.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [None]:
## Generate seeded prompts
np.random.seed(2319)
seedword = RandomWord()

nr_gen_prompts = []
nr_gen_references = []
corpclasses = ['neutral-corp', 'jinteki', 'weyland-consortium', 'nbn', 'haas-bioroid']
corpcardtypes = ['agenda'] * 15 + ['asset'] * 25 + ['upgrade'] * 15 + ['operation'] * 25 + ['ice'] * 25 + ['identity'] * 10
runnerclasses = ['shaper', 'neutral-runner', 'criminal', 'anarch']
runnercardtypes = ['event'] * 25 + ['program'] * 25 + ['resource'] * 25 + ['hardware'] * 25 + ['identity'] * 10

## Fill in prompts and references
for thisclass in corpclasses:
  for thistype in corpcardtypes:
    newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
    nr_gen_prompts.append(newprompt)
    # Take the first 5 references for each example
    ref_data = nr_all_data.loc[(nr_all_data['faction_code'] == thisclass) & (nr_all_data[thistype] == 1)]
    candidates = list(ref_data['description'].sample(n=5))
    if len(candidates) == 0:
      candidates = ["NONE"]
    nr_gen_references.append(candidates)

## Fill in prompts and references
for thisclass in runnerclasses:
  for thistype in runnercardtypes:
    newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
    nr_gen_prompts.append(newprompt)
    # Take the first 5 references for each example
    ref_data = nr_all_data.loc[(nr_all_data['faction_code'] == thisclass) & (nr_all_data[thistype] == 1)]
    candidates = list(ref_data['description'].sample(n=5))
    if len(candidates) == 0:
      candidates = ["NONE"]
    nr_gen_references.append(candidates)

print(nr_gen_prompts)
print(nr_gen_references)

['generate: A neutral-corp, agenda card using seed sincere fugato.', 'generate: A neutral-corp, agenda card using seed curious objection.', 'generate: A neutral-corp, agenda card using seed charming spirit.', 'generate: A neutral-corp, agenda card using seed earsplitting greatness.', 'generate: A neutral-corp, agenda card using seed tricky congressman.', 'generate: A neutral-corp, agenda card using seed narrow fingernail.', 'generate: A neutral-corp, agenda card using seed rough cherry.', 'generate: A neutral-corp, agenda card using seed irate picturesque.', 'generate: A neutral-corp, agenda card using seed pointless harvest.', 'generate: A neutral-corp, agenda card using seed expensive pavilion.', 'generate: A neutral-corp, agenda card using seed teeny-tiny bagpipe.', 'generate: A neutral-corp, agenda card using seed psychotic square.', 'generate: A neutral-corp, agenda card using seed wild merchandise.', 'generate: A neutral-corp, agenda card using seed substantial luck.', 'generate:

In [None]:
transformers.logging.set_verbosity_error()
nr_gen_candidates = []
for i, test_input_text in enumerate(nr_gen_prompts):
    test_inputs = t5_tokenizer([test_input_text], return_tensors='pt')
    test_output_ids = t5_model_nr.generate(test_inputs['input_ids'].cuda(),
                                        max_length=100,
                                        do_sample=True,
                                        top_k=40,
                                        temperature=1)
    for out_ids in test_output_ids:
      candidate = t5_tokenizer.decode(out_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
      nr_gen_candidates.append(candidate)
    if i % 10 == 0:
      print(f"Example #{i}: {nr_gen_candidates[-1]}")

# Save the generated dataset of fake cards for a starting point
candidates_df_nr = pd.DataFrame(nr_gen_candidates, columns = ['description'])

Example #0: The card named Rebuilding the New Orleans Police Department is a 3 advancement agenda worth 2 points with the subtypes initiative. It has an influence requirement of 0. The card text says When you install Rebuilding the New Orleans Police Department, spend 4 credits. You may draw 2 cards from headquarters.
Example #10: The card named Transform is a 3 advancement agenda worth 2 points with the subtypes . It has an influence requirement of 0. The card text says When you score this agenda, if you have already defeated at least one key antagonist and have no more than 1 piece of ice installed, spend click.
Example #20: The card named Trash is a 2 cost asset with the subtypes connection. It has an influence requirement of 0. It has a trash cost of 2. The card text says Interface ability: 1 credit: trash: Remove 1 tag.
Example #30: The card named “Non-Avoidance” is a 3 cost asset with the subtypes command. It has an influence requirement of 0. It has a trash cost of 2. The card t

In [None]:
nr_fakes_file = '/content/drive/My Drive/ds266proj/nr_fakes_orig_GAN.csv'
candidates_df_nr.to_csv(nr_fakes_file)