In [None]:
!pip install transformers[torch]
!pip install numba
!pip install accelerate -U
!pip install wonderwords
!pip install evaluate
!pip install rouge_score

Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->transformers[torch])
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->transformers[torch])
  Using cached nvidia_cublas_cu

In [None]:
import os
import re
import numpy as np
import pandas as pd
import wonderwords

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from wonderwords import RandomWord

import evaluate
from evaluate import load

import torch
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration, TFT5ForConditionalGeneration
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

## Load data

In [None]:
## Load datasets
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
model_checkpoints_path = '/content/drive/My Drive/ds266proj/model_checkpoints'

# Read data from to Google Drive
hs_all_data = pd.read_csv('/content/drive/My Drive/ds266proj/hs_cards_data_text.csv')
nr_all_data = pd.read_csv('/content/drive/My Drive/ds266proj/nr_cards_data_text.csv')

# Read in generated data
hs_fakes_data = pd.read_csv('/content/drive/My Drive/ds266proj/hs_fakes_orig.csv')
nr_fakes_data = pd.read_csv('/content/drive/My Drive/ds266proj/nr_fakes_orig.csv')
hs_fakes_data_GAN = pd.read_csv('/content/drive/My Drive/ds266proj/hs_fakes_orig_GAN.csv')
nr_fakes_data_GAN = pd.read_csv('/content/drive/My Drive/ds266proj/nr_fakes_orig_GAN.csv')

hs_gen_candidates = list(hs_fakes_data['description'])
nr_gen_candidates = list(nr_fakes_data['description'])
hs_gen_candidates_GAN = list(hs_fakes_data_GAN['description'])
nr_gen_candidates_GAN = list(nr_fakes_data_GAN['description'])

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
nr_fakes_data.head()

Unnamed: 0.1,Unnamed: 0,description
0,0,The card named Squida is a 3 advancement agend...
1,1,"The card named ""The Legacy of the Nation"" is a..."
2,2,The card named The Edge of the Community II is...
3,3,The card named Grain is a 5 advancement agenda...
4,4,The card named Interpolation is a 3 advancemen...


In [None]:
# Load the ROUGE and BLEU metrics
rouge = evaluate.load('rouge')
bleu = evaluate.load('bleu')

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

In [None]:
## IMPORTANT -- this is the same ordering of prompts used for generation so the scoring references must be generated in the same order
np.random.seed(2319)
seedword = RandomWord()

hs_gen_prompts = []
hs_gen_references = []
classes = ['Priest', 'Hunter', 'Rogue', 'Paladin', 'Warlock', 'Neutral', 'Shaman', 'Druid', 'Mage', 'Warrior', 'Deathknight', 'Demonhunter']
cardtypes = ['Spell'] * 100 + ['Minion'] * 125 + ['Hero'] * 5 + ['Weapon'] * 20
## Fille in prompts and references
for thisclass in classes:
  for thistype in cardtypes:
    newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
    hs_gen_prompts.append(newprompt)
    # Take the first 5 references for each example
    ref_data = hs_all_data.loc[(hs_all_data['classes'] == thisclass.upper()) & (hs_all_data[thistype] == 1)]
    if len(ref_data) > 0:
      candidates = list(ref_data['description'].sample(n=5, replace=True))
    else:
      candidates = []
    if len(candidates) == 0:
      candidates = ["NONE"]
    hs_gen_references.append(candidates)

print(hs_gen_prompts)
print(hs_gen_references)



In [None]:
## Generate references
## IMPORTANT -- this is the same ordering of prompts used for generation so the scoring references must be generated in the same order
np.random.seed(2319)
seedword = RandomWord()

nr_gen_prompts = []
nr_gen_references = []
corpclasses = ['neutral-corp', 'jinteki', 'weyland-consortium', 'nbn', 'haas-bioroid']
corpcardtypes = ['agenda'] * 15 + ['asset'] * 25 + ['upgrade'] * 15 + ['operation'] * 25 + ['ice'] * 25 + ['identity'] * 10
runnerclasses = ['shaper', 'neutral-runner', 'criminal', 'anarch']
runnercardtypes = ['event'] * 25 + ['program'] * 25 + ['resource'] * 25 + ['hardware'] * 25 + ['identity'] * 10

## Fill in prompts and references
for thisclass in corpclasses:
  for thistype in corpcardtypes:
    newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
    nr_gen_prompts.append(newprompt)
    # Take the first 5 references for each example
    ref_data = nr_all_data.loc[(nr_all_data['faction_code'] == thisclass) & (nr_all_data[thistype] == 1)]
    candidates = list(ref_data['description'].sample(n=5))
    if len(candidates) == 0:
      candidates = ["NONE"]
    nr_gen_references.append(candidates)

## Fill in prompts and references
for thisclass in runnerclasses:
  for thistype in runnercardtypes:
    newprompt = f'generate: A {thisclass}, {thistype} card using seed {seedword.word(include_parts_of_speech=["adjectives"])} {seedword.word(include_parts_of_speech=["nouns"])}.'
    nr_gen_prompts.append(newprompt)
    # Take the first 5 references for each example
    ref_data = nr_all_data.loc[(nr_all_data['faction_code'] == thisclass) & (nr_all_data[thistype] == 1)]
    candidates = list(ref_data['description'].sample(n=5))
    if len(candidates) == 0:
      candidates = ["NONE"]
    nr_gen_references.append(candidates)

print(nr_gen_prompts)
print(nr_gen_references)

['generate: A neutral-corp, agenda card using seed uneven selection.', 'generate: A neutral-corp, agenda card using seed cooing schema.', 'generate: A neutral-corp, agenda card using seed scientific venue.', 'generate: A neutral-corp, agenda card using seed spiffy characteristic.', 'generate: A neutral-corp, agenda card using seed shaky judgment.', 'generate: A neutral-corp, agenda card using seed dull climate.', 'generate: A neutral-corp, agenda card using seed dapper velvet.', 'generate: A neutral-corp, agenda card using seed obtainable monasticism.', 'generate: A neutral-corp, agenda card using seed shallow margin.', 'generate: A neutral-corp, agenda card using seed flagrant closing.', 'generate: A neutral-corp, agenda card using seed bizarre cygnet.', 'generate: A neutral-corp, agenda card using seed average supernatural.', 'generate: A neutral-corp, agenda card using seed imperfect exam.', 'generate: A neutral-corp, agenda card using seed questionable brace.', 'generate: A neutral

## Scoring -- Regular T5

### Hearthstone

In [None]:
rouge_hs = rouge.compute(predictions=hs_gen_candidates, references=hs_gen_references)
bleu_hs = bleu.compute(predictions=hs_gen_candidates, references=hs_gen_references)
print(rouge_hs)
print(bleu_hs)

{'rouge1': 0.6088048667736912, 'rouge2': 0.4283225470290236, 'rougeL': 0.5867039451162434, 'rougeLsum': 0.5867840216731501}
{'bleu': 0.4645642538842775, 'precisions': [0.7248706671384999, 0.5134847777604896, 0.39843458814759597, 0.3140783691628831], 'brevity_penalty': 1.0, 'length_ratio': 1.226095041322314, 'translation_length': 118686, 'reference_length': 96800}


### NetRunner

In [None]:
rouge_nr = rouge.compute(predictions=nr_gen_candidates, references=nr_gen_references)
bleu_nr = bleu.compute(predictions=nr_gen_candidates, references=nr_gen_references)
print(rouge_nr)
print(bleu_nr)

{'rouge1': 0.6099187739014813, 'rouge2': 0.4517114627396276, 'rougeL': 0.5749597068197209, 'rougeLsum': 0.5748285901265329}
{'bleu': 0.4912866590663603, 'precisions': [0.7511989951278928, 0.5240544159599448, 0.4212891475630518, 0.3512596214064931], 'brevity_penalty': 1.0, 'length_ratio': 1.1210582462129295, 'translation_length': 52544, 'reference_length': 46870}


## Scoring -- GAN T5

### Hearthstone

In [None]:
rouge_hs = rouge.compute(predictions=hs_gen_candidates_GAN, references=hs_gen_references)
bleu_hs = bleu.compute(predictions=hs_gen_candidates_GAN, references=hs_gen_references)
print(rouge_hs)
print(bleu_hs)

{'rouge1': 0.6098952352333902, 'rouge2': 0.431693880032225, 'rougeL': 0.5877899106121068, 'rougeLsum': 0.5877140315540368}
{'bleu': 0.4688746478590499, 'precisions': [0.7269329103136365, 0.5171192852752504, 0.4030734226267187, 0.3189758293535927], 'brevity_penalty': 1.0, 'length_ratio': 1.2243078512396695, 'translation_length': 118513, 'reference_length': 96800}


### NetRunner

In [None]:
rouge_nr = rouge.compute(predictions=nr_gen_candidates_GAN, references=nr_gen_references)
bleu_nr = bleu.compute(predictions=nr_gen_candidates_GAN, references=nr_gen_references)
print(rouge_nr)
print(bleu_nr)

{'rouge1': 0.6119253130141566, 'rouge2': 0.4546632090946921, 'rougeL': 0.577604180445872, 'rougeLsum': 0.5775113944635801}
{'bleu': 0.496598358723059, 'precisions': [0.7553440534213455, 0.5300690815867238, 0.4265633735324655, 0.3560904033096252], 'brevity_penalty': 1.0, 'length_ratio': 1.1118839342863238, 'translation_length': 52114, 'reference_length': 46870}
