In [None]:
pip install wonderwords

Collecting wonderwords
  Downloading wonderwords-2.2.0-py3-none-any.whl (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.0/45.0 kB[0m [31m992.1 kB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: wonderwords
Successfully installed wonderwords-2.2.0


## Setup and Libraries

In [None]:
## Library imports
import requests
import json
import gzip
import io
import pandas as pd
import warnings
import math
import numpy as np

import matplotlib.pyplot as plt
import wonderwords

from google.colab import files

# Other options
warnings.filterwarnings("ignore", category=DeprecationWarning)

## Data Imports

In [None]:
# NetRunner Cards
nr_response = requests.get("https://netrunnerdb.com/api/2.0/public/cards")

# Check if the request was successful
if nr_response.status_code == 200:
    nr_cards = nr_response.json()
else:
    print("Failed to fetch JSON data:", nr_response.status_code)

# Hearthstone Cards
hs_response = requests.get("https://static.firestoneapp.com/data/cards/cards_enUS.gz.json")

# Check if the request was successful
if hs_response.status_code == 200:
    hs_cards = hs_response.json()
else:
    print("Failed to fetch JSON data:", hs_response.status_code)

In [None]:
# Check unique card types and classes and see how many are left after limiting to 'normal' cards
nr_cards_data = nr_cards['data']
unique_types = []
unique_classes = []
unique_sets = []
for item in nr_cards_data:
  if 'type_code' in item.keys():
    unique_types.append(item['type_code'])
  if 'faction_code' in item.keys():
    unique_classes.append(item['faction_code'])
  if 'pack_code' in item.keys():
    unique_sets.append(item['pack_code'])

print(set(unique_types))
print(set(unique_classes))
print(set(unique_sets))

{'event', 'ice', 'identity', 'operation', 'program', 'resource', 'agenda', 'asset', 'hardware', 'upgrade'}
{'nbn', 'sunny-lebeau', 'haas-bioroid', 'anarch', 'apex', 'criminal', 'weyland-consortium', 'shaper', 'adam', 'neutral-runner', 'jinteki', 'neutral-corp'}
{'so', 'dc', 'baw', 'cac', 'bf', 'fm', 'sm', 'core', 'atr', 'cd', 'fal', 'rwr', 'es', 'si', 'ftm', 'su21', 'draft', 'uao', 'td', 'rar', 'wla', 'dag', 'tsb', 'dt', 'bb', 'mor', 'oh', 'ml', 'tdc', 'tc', 'fc', 'ur', 'sc19', 'tlm', 'mt', 'kg', 'bm', 'ss', 'mo', 'napd', 'ms', 'fp', 'cc', 'hap', 'ts', 'sg', 'msbp', 'dtwn', 'ta', 'oac', 'tdatd', 'df', 'qu', 'win', 'uot', '23s', 'core2', 'tai', 'urbp', 'ka', 'dad', 'ph', 'hs', 'up', 'uw', 'cotc', 'val', 'st', 'ce', 'asis', 'om', 'eas', 'in'}


In [None]:
# Check unique card types and classes and see how many are left after limiting to 'normal' cards
unique_types = []
unique_classes = []
for item in hs_cards:
  if 'type' in item.keys():
    unique_types.append(item['type'])
  if 'classes' in item.keys():
    for card_class in item['classes']:
      unique_classes.append(card_class)

print(set(unique_types))
print(set(unique_classes))

{'Enchantment', 'Lettuce_ability', 'Hero', 'Weapon', 'Game_mode_button', 'Battleground_anomaly', 'Battleground_spell', 'Spell', 'Hero_power', 'Move_minion_hover_target', 'Battleground_quest_reward', 'Location', 'Battleground_hero_buddy', 'Minion'}
{'PALADIN', 'HUNTER', 'PRIEST', 'SHAMAN', 'DRUID', 'MAGE', 'ROGUE', 'DEATHKNIGHT', 'DEMONHUNTER', 'WARRIOR', 'WHIZBANG', 'WARLOCK', 'NEUTRAL', 'DREAM'}


In [None]:
# Counts by type of card
card_types = {}
for card in nr_cards_data:
  card_type = card['type_code']
  if card_type not in card_types.keys():
    card_types[card_type] = 1
  else:
    card_types[card_type] += 1

print(card_types)
# Based on these counts it seems potentially safe to throw out some of the weird ones

# Check each of the unique card keys that appear
card_keys = []
for card in nr_cards_data:
  keyset = card.keys()
  for key in keyset:
    if not key in card_keys:
      card_keys.append(key)
print(card_keys)


{'identity': 161, 'event': 264, 'hardware': 163, 'program': 294, 'resource': 257, 'agenda': 204, 'asset': 235, 'operation': 240, 'ice': 364, 'upgrade': 130}
['code', 'deck_limit', 'faction_code', 'faction_cost', 'flavor', 'illustrator', 'influence_limit', 'keywords', 'minimum_deck_size', 'pack_code', 'position', 'quantity', 'side_code', 'stripped_text', 'stripped_title', 'text', 'title', 'type_code', 'uniqueness', 'base_link', 'cost', 'memory_cost', 'strength', 'advancement_cost', 'agenda_points', 'trash_cost']


In [None]:
# Limit to collectible cards only
hs_collectible = [card for card in hs_cards if 'collectible' in card.keys()]
print(f"Number of cards total: {len(hs_cards)}")
print(f"Number of collectible cards total: {len(hs_collectible)}")

# Counts by type of card
card_types = {}
for card in hs_collectible:
  card_type = card['type']
  if card_type not in card_types.keys():
    card_types[card_type] = 1
  else:
    card_types[card_type] += 1

print(card_types)
# Based on these counts it seems potentially safe to throw out some of the weird ones

# Check each of the unique card keys that appear
card_keys = []
for card in hs_collectible:
  keyset = card.keys()
  for key in keyset:
    if not key in card_keys:
      card_keys.append(key)
print(card_keys)

Number of cards total: 29254
Number of collectible cards total: 6203
{'Hero': 465, 'Spell': 1770, 'Minion': 3745, 'Weapon': 198, 'Location': 25}
['id', 'dbfId', 'name', 'set', 'cardClass', 'playerClass', 'classes', 'type', 'health', 'collectible', 'faction', 'rarity', 'artist', 'audio2', 'text', 'cost', 'spellSchool', 'flavor', 'attack', 'mechanics', 'race', 'races', 'referencedTags', 'availableAsSignature', 'relatedCardDbfIds', 'durability', 'availableAsDiamond', 'armor', 'questRewardDbfId', 'hideStats', 'enchantmentDbfId', 'deckDuplicateDbfId', 'additionalCosts', 'techLevel']


## Card Processing

### NetRunner cards

In [None]:
## CODE TO TURN HS CARDS INTO A DATASET
##===========================================================

# Ignore all warnings
warnings.filterwarnings('ignore')

## Choosing which cards to include
##-----------------------------------------------------------
print(f"All cards: {len(nr_cards_data)}")

nr_cards_all = pd.DataFrame(nr_cards_data)
print(f'Collectible only: {nr_cards_all.shape[0]}')
# print(max(nr_cards_all['code'].value_counts())) # non duplicativeness check 1

nr_cards_unique = pd.DataFrame(nr_cards_all)
nr_cards_unique = nr_cards_unique.sort_values(by='code', ascending=False)
nr_cards_unique = nr_cards_unique.drop_duplicates(subset='title', keep='first')
print(f'Unique cards: {nr_cards_unique.shape[0]}')

# Create the type delineations
typecols = pd.get_dummies(nr_cards_unique['type_code'], prefix='type')
typecols = typecols.fillna(0)
typecols.columns = typecols.columns.str.replace('type_', '')
nr_cards_unique = pd.concat([nr_cards_unique, typecols], axis=1)

# Keywords need to be split out into their constituent components
nr_cards_step1 = nr_cards_unique.copy()
nr_cards_step1['keywords'] = nr_cards_step1['keywords'].str.replace('G-mod', 'Gmod', regex=False)
nr_cards_step1['keywords'] = nr_cards_step1['keywords'].str.replace('Off-site', 'Offsite', regex=False)
nr_cards_step1['keywords'] = nr_cards_step1['keywords'].str.replace('Consumer-grade', 'Consumergrade', regex=False)
nr_cards_step1['keywords'] = nr_cards_step1['keywords'].str.replace('Caïssa', 'Caissa', regex=False)
nr_cards_step1['keywords'] = nr_cards_step1['keywords'].str.replace(' ', '', regex=False)
nr_cards_step1['keywords'].fillna('', inplace=True)
split_text = nr_cards_step1['keywords'].str.split('-', expand=True)
unique_keywords = set(split_text.values.ravel())
for keyword in unique_keywords:
  if keyword is not None:
    colname = keyword.lower()
    nr_cards_step1[colname] = nr_cards_step1['keywords'].str.contains(keyword).astype(int)
  nr_cards_step1[colname] = nr_cards_step1[colname].fillna(0)

# Several things in the stripped text need to be adjsuted
# abilitiy arrows, trace amounts, the names of HQ and R&D, +/- signs
nr_cards_step2 = nr_cards_step1.copy()
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('->', 'ability:', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('Trace[', 'Trace [', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('[', ' ', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace(']', ' ', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('HQ', 'headquarters', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('R&D', 'research', regex=False)
nr_cards_step2['stripped_text'].fillna('', inplace=True)
temp_index = nr_cards_step2['stripped_text'].str.contains(r' -\d+', regex=True)
nr_cards_step2.loc[temp_index, 'stripped_text'] = nr_cards_step2.loc[temp_index, 'stripped_text'].str.replace('-', 'minus ', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('+', 'plus ', regex=False)
# Quote abilties are race but a nuisance. Remove them for now
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('"', '', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('MU', 'mu', regex=False)
nr_cards_step2['stripped_text'] = nr_cards_step2['stripped_text'].str.replace('mu', 'memory units', regex=False)

# Fill missing values with -1 for cards that have a vlue of NaN
nr_cards_step3 = nr_cards_step2.copy()
nr_cards_step3['base_link'].fillna(-1, inplace=True)
nr_cards_step3['cost'].fillna(-1, inplace=True)
nr_cards_step3['memory_cost'].fillna(-1, inplace=True)
nr_cards_step3['strength'].fillna(-1, inplace=True)
nr_cards_step3['advancement_cost'].fillna(-1, inplace=True)
nr_cards_step3['agenda_points'].fillna(-1, inplace=True)
nr_cards_step3['trash_cost'].fillna(-1, inplace=True)
nr_cards_step3['influence_limit'].fillna(-1, inplace=True)
nr_cards_step3['minimum_deck_size'].fillna(-1, inplace=True)
nr_cards_step3['deck_limit'].fillna(-1, inplace=True)

# other minor adjustments
nr_cards_step4 = nr_cards_step3.copy()
nr_cards_step4['unique'] = (nr_cards_step4['uniqueness'] == True).astype(int)
nr_cards_step4['runner'] = (nr_cards_step4['side_code'] != "corp").astype(int)
nr_cards_step4['corp'] = (nr_cards_step4['side_code'] == "corp").astype(int)
nr_cards_step4['influence'] = nr_cards_step4['faction_cost']
nr_cards_step4['name'] = nr_cards_step4['title']
types = list(set(unique_types))
for thistype in types:
  nr_cards_step4[thistype] = (nr_cards_step4['type_code'] == thistype).astype(int)

# Final column selection
nr_cards_clean = nr_cards_step4[['faction_code',
                                 # Text fields
                                 'name', 'stripped_text',
                                 # Key info
                                 'runner', 'corp', 'ice', 'upgrade', 'hardware', 'resource', 'identity',
                                 'operation', 'agenda', 'program', 'asset', 'event',
                                 # Card dscriptors
                                 'influence', 'unique', 'cost', 'trash_cost', 'memory_cost', 'strength',
                                 'advancement_cost', 'agenda_points', 'base_link',
                                 'influence_limit', 'minimum_deck_size', 'deck_limit',
                                 # Keywords
                                 'morph', 'job', 'location', 'deepnet', 'ambush', 'blackops',
                                 'advertisement', 'lockdown', 'codegate', 'enforcer', 'genetics',
                                 'sabotage', 'chip', 'source', 'grayops', 'consumergrade', 'division',
                                 'priority', 'killer', 'link', 'directive', 'sensie', 'caissa',
                                 'trap', 'remote', 'companion', 'deflector', 'hostile', 'vehicle', 'seedy',
                                 'bioroid', 'terminal', 'harmonic', 'deva', 'political', 'decoder', 'beanstalk',
                                 'psi', 'megacorp', 'cast', 'orgcrime', 'clone', 'reprisal', 'console', 'mythic',
                                 'corporation', 'clan', 'grail', 'ai', 'barrier', 'research', 'character', 'trojan',
                                 'stealth', 'triple', 'icebreaker', 'academic', 'securityprotocol', 'cyborg',
                                 'digital', 'connection', 'transaction', 'ritzy', 'fracter', 'region', 'expendable',
                                 'virus', 'expansion', 'offsite', 'industrial', 'daemon', 'virtual', 'observer',
                                 'facility', 'current', 'alliance', 'cloud', 'executive', 'condition', 'policedepartment',
                                 'weapon', 'destroyer', 'tracer', 'unorthodox', 'natural', 'sentry', 'subsidiary',
                                 'security', 'gear', 'sysop', 'next', 'run', 'double', 'gmod', 'ap',
                                 'cybernetic', 'initiative', 'illicit', 'mod', 'government', 'public']]

# Convert float64 columns to int64
float64_columns = nr_cards_clean.select_dtypes(include='float64').columns
nr_cards_clean[float64_columns] = nr_cards_clean[float64_columns].astype('int64')

## Saving the dataset to look at elsewhere
##------------------------------------------------------------
# nr_cards_clean.to_csv('nr_cards_clean.csv', index=False)
# files.download('nr_cards_clean.csv')

# Reset warnings to default behavior
warnings.resetwarnings()


All cards: 2312
Collectible only: 2312
Unique cards: 1906


### Hearthstone cards

In [None]:
## CODE TO TURN HS CARDS INTO A DATASET
##===========================================================

# Ignore all warnings
warnings.filterwarnings('ignore')

## Choosing which cards to include
##-----------------------------------------------------------
hs_collectible = [card for card in hs_cards if 'collectible' in card.keys()]
print(f"All cards: {len(hs_cards)}")

hs_cards_all = pd.DataFrame(hs_collectible)
print(f'Collectible only: {hs_cards_all.shape[0]}')

## Remove cards with a non-blank value for deckDuplicateDbfId -- lose ~1000 cards
hs_cards_unique = hs_cards_all[hs_cards_all['deckDuplicateDbfId'].isnull()]
print(f'After removing reprints: {hs_cards_unique.shape[0]}')

## Remove hero cards with no game text, since these are skins only
hs_cards_step1 = hs_cards_unique[hs_cards_unique['set'] != 'Hero_skins']
print(f'After removing hero skins: {hs_cards_step1.shape[0]}')

## Let each multi-class card have multiple copes
hs_cards_step2 = hs_cards_step1.explode('classes')
print(f'After expanding multi-class cards: {hs_cards_step2.shape[0]}')

## Fixing columns and retaining relevant info
##-----------------------------------------------------------
hs_cards_step3 = hs_cards_step2[['name', 'classes', 'type', 'health', 'cost', 'attack', 'spellSchool', 'races', 'durability', 'armor', 'mechanics', 'text', 'flavor']]

## Clean up text fields
html_pattern = r'<.*?>'
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace(html_pattern, '', regex=True)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('[x]', '', regex=False)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('#', '', regex=False)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('$', '', regex=False)
# These three deal with things like 2/2, or +3/+3 to mean health and attack
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('/', ' dash ', regex=False)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('+', ' plus ', regex=False)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('-', ' plus ', regex=False)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('%', ' percent ', regex=False)
hs_cards_step3['text'] = hs_cards_step3['text'].str.replace('  ', ' ', regex=False)
hs_cards_step3['flavor'] = hs_cards_step3['flavor'].str.replace(html_pattern, '', regex=True)
hs_cards_step3['classes'] = hs_cards_step3['classes'].str.replace("['", '', regex=False)
hs_cards_step3['classes'] = hs_cards_step3['classes'].str.replace("']", '', regex=False)

## Generate Dummy variables
##------------------------------------------------------------
hs_cards_step4 = hs_cards_step3.copy()

# Create extra dummies for the single classification types
type_dummies = pd.get_dummies(hs_cards_step4['type'])
spell_dummies = pd.get_dummies(hs_cards_step4['spellSchool'])

hs_cards_step4 = pd.concat([hs_cards_step4, type_dummies], axis = 1)
hs_cards_step4 = pd.concat([hs_cards_step4, spell_dummies], axis = 1)

# Fill missing values with -1 for cards that have a vlue of NaN
hs_cards_step4['health'].fillna(-1, inplace=True)
hs_cards_step4['cost'].fillna(-1, inplace=True)
hs_cards_step4['attack'].fillna(-1, inplace=True)
hs_cards_step4['durability'].fillna(-1, inplace=True)
hs_cards_step4['armor'].fillna(-1, inplace=True)

# Special handling for tribe and other key tags
# Extract unique race categories
def expandMultiCat(in_data, col_name):
  out_data = in_data.copy()
  unique_categories = set()
  # Find all the unique classes within the non-missing entries of the data
  missing_rows = pd.isna(out_data[col_name])
  vals_list = out_data.loc[[not i for i in missing_rows], col_name]
  for item in vals_list:
    unique_categories.update(item)
  # Set the type of all others to "none"
  out_data.loc[missing_rows, col_name] = [['NONE']]
  # Once the unique categories exist create dummies as needed
  for unique_cat in unique_categories:
    out_data.loc[:, unique_cat] = out_data[col_name].apply(lambda x: 1 if unique_cat in x else 0)
  # Return the fixed dataset
  return out_data

# Apply special handling to the multi-type sections
hs_cards_step5 = expandMultiCat(hs_cards_step4, 'races')
hs_cards_step6 = expandMultiCat(hs_cards_step5, 'mechanics')

# Keep only a subset of the relevant columns for the modeling exercise
hs_cards_features = hs_cards_step6[['classes', # The label of interest
                                    # Categorical & numeric features
                                    'health', 'cost', 'attack', 'durability', 'armor',
                                    # Card types
                                    'Hero', 'Location', 'Minion', 'Spell', 'Weapon',
                                    # Text features
                                    'name', 'text', 'flavor',
                                    # Card tribes
                                    'MECH', 'QUILBOAR', 'DEMON', 'PIRATE', 'TOTEM', 'NAGA', 'ELEMENTAL', 'ALL', 'BEAST', 'MURLOC', 'DRAGON', 'UNDEAD',
                                    # Spell Schools
                                    'ARCANE', 'FEL', 'FIRE', 'FROST', 'HOLY', 'NATURE', 'SHADOW',
                                    # There are a LOT of mechanics, some appearing on only a few cards
                                    # Model will explore the usefulness of adding these
                                    'QUICKDRAW', 'COLLECTIONMANAGER_FILTER_MANA_ODD', 'ADJACENT_BUFF', 'GEARS',
                                    'DEAL_DAMAGE', 'FORGETFUL', 'MODULAR', 'SECRET', 'ECHO',
                                    'MULTIPLY_BUFF_VALUE', 'CHOOSE_ONE', 'FORGE', 'WHELP', 'HONORABLE_KILL',
                                    'TWINSPELL', 'QUEST', 'FRENZY', 'OVERHEAL', 'CHARGE', 'FREEZE',
                                    'OVERKILL', 'SPELLPOWER', 'DIVINE_SHIELD', 'AFFECTED_BY_SPELL_POWER',
                                    'ENRAGED', 'COMBO', 'DEATHRATTLE', 'REBORN', 'CORRUPT', 'HIDE_STATS',
                                    'SILENCE', 'TOPDECK', 'POISONOUS', 'WINDFURY', 'TAUNT', 'KABAL',
                                    'COLOSSAL', 'GRIMY_GOONS', 'TRADEABLE', 'OVERLOAD', 'INSPIRE',
                                    'NON_KEYWORD_ECHO', 'SUMMON', 'EXCAVATE', 'AURA', 'BATTLECRY',
                                    'OUTCAST', 'MANATHIRST', 'RUSH', 'HEROPOWER_DAMAGE',
                                    'RECEIVES_DOUBLE_SPELLDAMAGE_BONUS', 'JADE_LOTUS', 'FINALE',
                                    'DEATH_KNIGHT', 'DREDGE', 'INFUSE',
                                    'COLLECTIONMANAGER_FILTER_MANA_EVEN', 'TRIGGER_VISUAL', 'LIFESTEAL',
                                    'START_OF_GAME_KEYWORD', 'IMP', 'RESTORE_HEALTH', 'JADE_GOLEM',
                                    'DISCOVER', 'STEALTH', 'FINISH_ATTACK_SPELL_ON_DAMAGE']]

## Make sure that dummies have the supported int type for random forests
int_cols = ['health', 'cost', 'attack', 'durability', 'armor',
            'Hero', 'Location', 'Minion', 'Spell', 'Weapon',
            'MECH', 'QUILBOAR', 'DEMON', 'PIRATE', 'TOTEM', 'NAGA', 'ELEMENTAL', 'ALL', 'BEAST', 'MURLOC', 'DRAGON', 'UNDEAD',
            'ARCANE', 'FEL', 'FIRE', 'FROST', 'HOLY', 'NATURE', 'SHADOW',
            'QUICKDRAW', 'COLLECTIONMANAGER_FILTER_MANA_ODD', 'ADJACENT_BUFF', 'GEARS',
            'DEAL_DAMAGE', 'FORGETFUL', 'MODULAR', 'SECRET', 'ECHO',
            'MULTIPLY_BUFF_VALUE', 'CHOOSE_ONE', 'FORGE', 'WHELP', 'HONORABLE_KILL',
            'TWINSPELL', 'QUEST', 'FRENZY', 'OVERHEAL', 'CHARGE', 'FREEZE',
            'OVERKILL', 'SPELLPOWER', 'DIVINE_SHIELD', 'AFFECTED_BY_SPELL_POWER',
            'ENRAGED', 'COMBO', 'DEATHRATTLE', 'REBORN', 'CORRUPT', 'HIDE_STATS',
            'SILENCE', 'TOPDECK', 'POISONOUS', 'WINDFURY', 'TAUNT', 'KABAL',
            'COLOSSAL', 'GRIMY_GOONS', 'TRADEABLE', 'OVERLOAD', 'INSPIRE',
            'NON_KEYWORD_ECHO', 'SUMMON', 'EXCAVATE', 'AURA', 'BATTLECRY',
            'OUTCAST', 'MANATHIRST', 'RUSH', 'HEROPOWER_DAMAGE',
            'RECEIVES_DOUBLE_SPELLDAMAGE_BONUS', 'JADE_LOTUS', 'FINALE',
            'DEATH_KNIGHT', 'DREDGE', 'INFUSE',
            'COLLECTIONMANAGER_FILTER_MANA_EVEN', 'TRIGGER_VISUAL', 'LIFESTEAL',
            'START_OF_GAME_KEYWORD', 'IMP', 'RESTORE_HEALTH', 'JADE_GOLEM',
            'DISCOVER', 'STEALTH', 'FINISH_ATTACK_SPELL_ON_DAMAGE']
hs_cards_features[int_cols] = hs_cards_features[int_cols].astype('int32')

## Saving the dataset to look at elsewhere
##------------------------------------------------------------
# hs_cards_features.to_csv('hs_cards_features.csv', index=False)
# files.download('hs_cards_features.csv')

# Reset warnings to default behavior
warnings.resetwarnings()

  and should_run_async(code)


All cards: 29254
Collectible only: 6203
After removing reprints: 5092
After removing hero skins: 4669
After expanding multi-class cards: 4781


## Data Augmentation and adjustments

### Netrunner cards

In [None]:
## Further data adjustements
## The data is highly imbalanced in favor of the netrual cards; easiest way to deal with this seems to be to skip 1 in 4 of them
## Alternative would be to double examples of all the other classes
value_counts = nr_cards_clean['faction_code'].value_counts()
print(value_counts)

## The minifactions have significantly fewer cards, to the point that including them is probably not worth it
## There are fewer neutrals as well, so we'll double examples for those
neutral_corp = nr_cards_clean.loc[nr_cards_clean['faction_code'] == 'neutral-corp']
neutral_runner = nr_cards_clean.loc[nr_cards_clean['faction_code'] == 'neutral-runner']
nr_cards_nomini = nr_cards_clean.loc[~nr_cards_clean['faction_code'].isin(['apex', 'adam', 'sunny-lebeau'])]

## Shuffle the neutrals dataset and drop half
nr_cards_data_augment = pd.concat([neutral_corp, neutral_runner, nr_cards_nomini], axis=0)

## Re check the counts after data augmentation
value_counts_2 = nr_cards_data_augment['faction_code'].value_counts()
print(value_counts_2)

shaper                243
criminal              242
weyland-consortium    234
anarch                234
haas-bioroid          228
nbn                   220
jinteki               216
neutral-corp          139
neutral-runner        114
sunny-lebeau           12
apex                   12
adam                   12
Name: faction_code, dtype: int64
neutral-corp          278
shaper                243
criminal              242
weyland-consortium    234
anarch                234
neutral-runner        228
haas-bioroid          228
nbn                   220
jinteki               216
Name: faction_code, dtype: int64


  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  common = np.find_common_type([values.dtype, comps_array.dtype], [])


### Hearthstone cards

In [None]:
## Further data adjustements
## The data is highly imbalanced in favor of the netrual cards; easiest way to deal with this seems to be to skip 1 in 4 of them
## Alternative would be to double examples of all the other classes
value_counts = hs_cards_features['classes'].value_counts()
print(value_counts)

## For data augmentation we'll duplicate each example of the class specific cards, and dump half of the neutral cards randomly to try to get better class balance
non_neutrals = hs_cards_features.loc[hs_cards_features['classes'] != 'NEUTRAL']
neutrals = hs_cards_features.loc[hs_cards_features['classes'] == 'NEUTRAL']
## Shuffle the neutrals dataset and drop half
shuffled_neutrals = neutrals.sample(frac=1, random_state=2319)
num_rows_to_keep = len(shuffled_neutrals) // 2
half_dropped_neutrals = shuffled_neutrals.head(num_rows_to_keep)
## Stick the datasets back together; the half dropped neutrals and two copies of the non neutrals
hs_cards_data_augment = pd.concat([non_neutrals, non_neutrals, half_dropped_neutrals], axis=0)

## Re check the counts after data augmentation
value_counts_2 = hs_cards_data_augment['classes'].value_counts()
print(value_counts_2)

NEUTRAL        1358
PRIEST          350
WARLOCK         343
PALADIN         343
ROGUE           342
WARRIOR         341
MAGE            341
SHAMAN          340
DRUID           340
HUNTER          340
DEMONHUNTER     217
DEATHKNIGHT     126
Name: classes, dtype: int64
PRIEST         700
WARLOCK        686
PALADIN        686
ROGUE          684
WARRIOR        682
MAGE           682
SHAMAN         680
DRUID          680
HUNTER         680
NEUTRAL        679
DEMONHUNTER    434
DEATHKNIGHT    252
Name: classes, dtype: int64


  and should_run_async(code)


## Language model prep

In [None]:
# Random seed word testing
from wonderwords import RandomWord
seedword = RandomWord()
print(seedword.word(include_parts_of_speech=["adjectives"]) + " " + seedword.word(include_parts_of_speech=["nouns"]))


callous interferometer


  and should_run_async(code)


### Netrunner cards

In [None]:
## LANGUAGE DATA PREP
#===============================================================
# The various pieces of the the card need to be stuck together to create something sensible

# Constants
SUBTYPES = ['morph', 'job', 'location', 'deepnet', 'ambush', 'blackops',
            'advertisement', 'lockdown', 'codegate', 'enforcer', 'genetics',
            'sabotage', 'chip', 'source', 'grayops', 'consumergrade', 'division',
            'priority', 'killer', 'link', 'directive', 'sensie', 'caissa',
            'trap', 'remote', 'companion', 'deflector', 'hostile', 'vehicle', 'seedy',
            'bioroid', 'terminal', 'harmonic', 'deva', 'political', 'decoder', 'beanstalk',
            'psi', 'megacorp', 'cast', 'orgcrime', 'clone', 'reprisal', 'console', 'mythic',
            'corporation', 'clan', 'grail', 'ai', 'barrier', 'research', 'character', 'trojan',
            'stealth', 'triple', 'icebreaker', 'academic', 'securityprotocol', 'cyborg',
            'digital', 'connection', 'transaction', 'ritzy', 'fracter', 'region', 'expendable',
            'virus', 'expansion', 'offsite', 'industrial', 'daemon', 'virtual', 'observer',
            'facility', 'current', 'alliance', 'cloud', 'executive', 'condition', 'policedepartment',
            'weapon', 'destroyer', 'tracer', 'unorthodox', 'natural', 'sentry', 'subsidiary',
            'security', 'gear', 'sysop', 'next', 'run', 'double', 'gmod', 'ap',
            'cybernetic', 'initiative', 'illicit', 'mod', 'government', 'public']

def extendDescription(desc, tag):
  if desc == "":
    desc = desc + tag.lower().replace("_", " ")
  else:
    desc = desc + ", " + tag.lower().replace("_", " ")
  return desc

def generateDescriptionNRData(row):
  ## Get together the subtype for minions/spell schools, or otherwise
  type_modifier = ""
  stats_modifier = ""
  desc = f"The card named {row['name']} is a "
  if row['ice'] == 1:
    desc = desc + f"{int(row['cost'])} cost {int(row['strength'])} strength ice "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['program'] == 1:
    if row['strength'] > -1:
      stren_str = f"{int(row['strength'])} strength"
    else:
      stren_str = f""
    desc = desc + f"{int(row['cost'])} cost {stren_str} program "
    if row['memory_cost'] > -1:
      desc = desc + f"that requires {int(row['memory_cost'])} memory "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['event'] == 1:
    desc = desc + f"{int(row['cost'])} cost event "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['operation'] == 1:
    desc = desc + f"{int(row['cost'])} cost operation "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['hardware'] == 1:
    desc = desc + f"{int(row['cost'])} cost hardware "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['resource'] == 1:
    desc = desc + f"{int(row['cost'])} cost resource "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['agenda'] == 1:
    desc = desc + f"{int(row['advancement_cost'])} advancement agenda worth {int(row['agenda_points'])} points "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['asset'] == 1:
    desc = desc + f"{int(row['cost'])} cost asset "
    for subtype in SUBTYPES:
      if row[subtype] == 1:
        type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['upgrade'] == 1:
    desc = desc + f"{int(row['cost'])} cost upgrade "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['identity'] == 1:
    desc = desc + f" {int(row['minimum_deck_size'])} deck size identity "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "

  ## Influence
  if row['identity'] == 1:
    desc = desc + f"It has an influence allotment of {int(row['influence'])}. "
  else:
    desc = desc + f"It has an influence requirement of {int(row['influence'])}. "

  ## Trash costs and game text
  if row['trash_cost'] > -1:
    desc = desc + f"It has a trash cost of {int(row['trash_cost'])}. The card text says {row['stripped_text']}."
  else:
    desc = desc + f"The card text says {row['stripped_text']}."

  ## Fix spacing and punctuation
  desc = desc.replace("  ", " ")
  desc = desc.replace("..", ".")

  return desc

## t5 peices -----------------------------------------------------------
## Code for the t5 name generation
def generateGenerateNRData(row):
  types = ['ice', 'upgrade', 'hardware', 'resource', 'identity', 'operation', 'agenda', 'program', 'asset', 'event']
  thistype = ''
  for option in types:
    if row[option] == 1:
      thistype = option
  ## Pull together the pieces of the description
  prompt = f"generate: A {row['faction_code'].lower()}, {thistype} card using seed {seedword.word(include_parts_of_speech=['adjectives'])} {seedword.word(include_parts_of_speech=['nouns'])}."
  return prompt

def generateQuestionNRData(row):
  types = ['ice', 'upgrade', 'hardware', 'resource', 'identity', 'operation', 'agenda', 'program', 'asset', 'event']
  thistype = ''
  for option in types:
    if row[option] == 1:
      thistype = option
  ## Pull together the pieces of the description
  prompt = f"describe card: The {row['faction_code'].lower()}, {thistype} card called {row['name']}."
  return prompt

def generateAnswerNRData(row):
  ## Get together the subtype for minions/spell schools, or otherwise
  type_modifier = ""
  stats_modifier = ""
  desc = f"This card is a "
  if row['ice'] == 1:
    desc = desc + f"{int(row['cost'])} cost {int(row['strength'])} strength ice "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['program'] == 1:
    if row['strength'] > -1:
      stren_str = f"{int(row['strength'])} strength"
    else:
      stren_str = f""
    desc = desc + f"{int(row['cost'])} cost {stren_str} program "
    if row['memory_cost'] > -1:
      desc = desc + f"that requires {int(row['memory_cost'])} memory "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['event'] == 1:
    desc = desc + f"{int(row['cost'])} cost event "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['operation'] == 1:
    desc = desc + f"{int(row['cost'])} cost operation "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['hardware'] == 1:
    desc = desc + f"{int(row['cost'])} cost hardware "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['resource'] == 1:
    desc = desc + f"{int(row['cost'])} cost resource "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['agenda'] == 1:
    desc = desc + f"{int(row['advancement_cost'])} advancement agenda worth {int(row['agenda_points'])} points "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['asset'] == 1:
    desc = desc + f"{int(row['cost'])} cost asset "
    for subtype in SUBTYPES:
      if row[subtype] == 1:
        type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['upgrade'] == 1:
    desc = desc + f"{int(row['cost'])} cost upgrade "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "
  elif row['identity'] == 1:
    desc = desc + f" {int(row['minimum_deck_size'])} deck size identity "
    for subtype in SUBTYPES:
        if row[subtype] == 1:
          type_modifier = extendDescription(type_modifier, subtype)
    desc = desc + f"with the subtypes {type_modifier}. "

  ## Influence
  if row['identity'] == 1:
    desc = desc + f"It has an influence allotment of {int(row['influence'])}. "
  else:
    desc = desc + f"It has an influence requirement of {int(row['influence'])}. "

  ## Trash costs and game text
  if row['trash_cost'] > -1:
    desc = desc + f"It has a trash cost of {int(row['trash_cost'])}. The card text says {row['stripped_text']}."
  else:
    desc = desc + f"The card text says {row['stripped_text']}."

  ## Fix spacing and punctuation
  desc = desc.replace("  ", " ")
  desc = desc.replace("..", ".")

  return desc

nr_cards_data_text = nr_cards_data_augment.copy()
nr_cards_data_text['description'] = nr_cards_data_text.apply(lambda row: generateDescriptionNRData(row), axis=1)
nr_cards_data_text['t5generate'] = nr_cards_data_text.apply(lambda row: generateGenerateNRData(row), axis=1)
nr_cards_data_text['t5prompt'] = nr_cards_data_text.apply(lambda row: generateQuestionNRData(row), axis=1)
nr_cards_data_text['t5answer'] = nr_cards_data_text.apply(lambda row: generateAnswerNRData(row), axis=1)
nr_cards_data_text.reset_index(inplace=True)

See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


### Hearthstone cards

In [None]:
## HEARTHSTONE LANGUAGE DATA PREP
#===============================================================
# The various pieces of the the card need to be stuck together to create something sensible

# Constants
TRIBES = ['MECH', 'QUILBOAR', 'DEMON', 'PIRATE', 'TOTEM', 'NAGA', 'ELEMENTAL', 'ALL', 'BEAST', 'MURLOC', 'DRAGON', 'UNDEAD']
SPELLSCHOOLS = ['ARCANE', 'FEL', 'FIRE', 'FROST', 'HOLY', 'NATURE', 'SHADOW']
MECHANICS = ['QUICKDRAW', 'COLLECTIONMANAGER_FILTER_MANA_ODD', 'ADJACENT_BUFF', 'GEARS',
            'DEAL_DAMAGE', 'FORGETFUL', 'MODULAR', 'SECRET', 'ECHO',
            'MULTIPLY_BUFF_VALUE', 'CHOOSE_ONE', 'FORGE', 'WHELP', 'HONORABLE_KILL',
            'TWINSPELL', 'QUEST', 'FRENZY', 'OVERHEAL', 'CHARGE', 'FREEZE',
            'OVERKILL', 'SPELLPOWER', 'DIVINE_SHIELD', 'AFFECTED_BY_SPELL_POWER',
            'ENRAGED', 'COMBO', 'DEATHRATTLE', 'REBORN', 'CORRUPT', 'HIDE_STATS',
            'SILENCE', 'TOPDECK', 'POISONOUS', 'WINDFURY', 'TAUNT', 'KABAL',
            'COLOSSAL', 'GRIMY_GOONS', 'TRADEABLE', 'OVERLOAD', 'INSPIRE',
            'NON_KEYWORD_ECHO', 'SUMMON', 'EXCAVATE', 'AURA', 'BATTLECRY',
            'OUTCAST', 'MANATHIRST', 'RUSH', 'HEROPOWER_DAMAGE',
            'RECEIVES_DOUBLE_SPELLDAMAGE_BONUS', 'JADE_LOTUS', 'FINALE',
            'DEATH_KNIGHT', 'DREDGE', 'INFUSE',
            'COLLECTIONMANAGER_FILTER_MANA_EVEN', 'TRIGGER_VISUAL', 'LIFESTEAL',
            'START_OF_GAME_KEYWORD', 'IMP', 'RESTORE_HEALTH', 'JADE_GOLEM',
            'DISCOVER', 'STEALTH', 'FINISH_ATTACK_SPELL_ON_DAMAGE']

def extendDescription(desc, tag):
  if desc == "":
    desc = desc + tag.lower().replace("_", " ")
  else:
    desc = desc + ", " + tag.lower().replace("_", " ")
  return desc

def generateDescriptionHSData(row):
  ## Get together the subtype for minions/spell schools, or otherwise
  type_modifier = ""
  stats_modifier = ""
  if row['Minion'] == 1:
    for tribe in TRIBES:
      if row[tribe] == 1:
        type_modifier = extendDescription(type_modifier, tribe)
    type_modifier = type_modifier + " minion"
    stats_modifier = stats_modifier + f"with {row['health']} health and {row['attack']} attack"
    if row['armor'] > 0:
      stats_modifier = stats_modifier + f" and {row['armor']} armor"
  elif row['Spell'] == 1:
    for spellschool in SPELLSCHOOLS:
      if row[spellschool] == 1:
        type_modifier = extendDescription(type_modifier, spellschool)
    type_modifier = type_modifier + " spell"
  elif row['Weapon'] == 1:
    type_modifier = type_modifier + " weapon"
    stats_modifier = stats_modifier + f"with {row['attack']} attack and {row['durability']} durability"
  elif row['Hero'] == 1:
    type_modifier = type_modifier + " hero"
    stats_modifier = stats_modifier + f"with {row['armor']} armor"
  elif row['Location'] == 1:
    type_modifier = type_modifier + " location"
    stats_modifier = stats_modifier + f"with {row['health']} health"
  ## Add info about various special mechanics
  special_effects = ""
  has_special_effects = False
  for mechanic in MECHANICS:
    if row[mechanic] == 1:
      has_special_effects = True
      special_effects = extendDescription(special_effects, mechanic)
  ## Pull together the pieces of the description
  finaldesc = f"The card named {row['name']} is a {row['cost']} cost {type_modifier} {stats_modifier}"
  if has_special_effects:
    finaldesc = finaldesc + ", and includes the effects " + special_effects
  cleansed_text = str(row['text']).replace('\n', ' ')
  finaldesc = finaldesc + f". The card text says: {cleansed_text}"

  return finaldesc

## ---------------------------------------------------------
## Code for the t5 name generation
def generateGenerateHSData(row):
  types = ['Hero', 'Location', 'Minion', 'Spell', 'Weapon']
  thistype = ''
  for option in types:
    if row[option] == 1:
      thistype = option
  ## Pull together the pieces of the description
  prompt = f"generate: A {row['classes'].lower()}, {thistype} card using seed {seedword.word(include_parts_of_speech=['adjectives'])} {seedword.word(include_parts_of_speech=['nouns'])}."
  return prompt

## Code for the t5 generation prompt
def generateQuestionHSData(row):
  types = ['Hero', 'Location', 'Minion', 'Spell', 'Weapon']
  thistype = ''
  for option in types:
    if row[option] == 1:
      thistype = option
  ## Pull together the pieces of the description
  prompt = f"describe card: The {row['classes'].lower()}, {thistype} card called {row['name']}."
  return prompt

def generateAnswerHSData(row):
  ## Pull together the pieces of the description
  type_modifier = ""
  stats_modifier = ""
  if row['Minion'] == 1:
    for tribe in TRIBES:
      if row[tribe] == 1:
        type_modifier = extendDescription(type_modifier, tribe)
    type_modifier = type_modifier + " minion"
    stats_modifier = stats_modifier + f"with {row['health']} health and {row['attack']} attack"
    if row['armor'] > 0:
      stats_modifier = stats_modifier + f" and {row['armor']} armor"
  elif row['Spell'] == 1:
    for spellschool in SPELLSCHOOLS:
      if row[spellschool] == 1:
        type_modifier = extendDescription(type_modifier, spellschool)
    type_modifier = type_modifier + " spell"
  elif row['Weapon'] == 1:
    type_modifier = type_modifier + " weapon"
    stats_modifier = stats_modifier + f"with {row['attack']} attack and {row['durability']} durability"
  elif row['Hero'] == 1:
    type_modifier = type_modifier + " hero"
    stats_modifier = stats_modifier + f"with {row['armor']} armor"
  elif row['Location'] == 1:
    type_modifier = type_modifier + " location"
    stats_modifier = stats_modifier + f"with {row['health']} health"
  ## Add info about various special mechanics
  special_effects = ""
  has_special_effects = False
  for mechanic in MECHANICS:
    if row[mechanic] == 1:
      has_special_effects = True
      special_effects = extendDescription(special_effects, mechanic)
  ## Pull together the pieces of the description
  finaldesc = f"This card is a {row['cost']} cost {type_modifier} {stats_modifier}"
  if has_special_effects:
    finaldesc = finaldesc + ", and includes the effects " + special_effects
  cleansed_text = str(row['text']).replace('\n', ' ')
  finaldesc = finaldesc + f". The card text says: {cleansed_text}"

  return finaldesc


hs_cards_data_text = hs_cards_data_augment.copy()
hs_cards_data_text['description'] = hs_cards_data_text.apply(lambda row: generateDescriptionHSData(row), axis=1)
hs_cards_data_text['t5generate'] = hs_cards_data_text.apply(lambda row: generateGenerateHSData(row), axis=1)
hs_cards_data_text['t5prompt'] = hs_cards_data_text.apply(lambda row: generateQuestionHSData(row), axis=1)
hs_cards_data_text['t5answer'] = hs_cards_data_text.apply(lambda row: generateAnswerHSData(row), axis=1)
hs_cards_data_text.reset_index(inplace=True)

  and should_run_async(code)
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])
See https://numpy.org/devdocs/release/1.25.0-notes.html and the docs for more information.  (Deprecated NumPy 1.25)
  return np.find_common_type(types, [])


In [None]:
## Specify exact columns to include
nr_cards_data_text = nr_cards_data_text[['faction_code',
                                         'description',
                                         't5generate',
                                         't5prompt',
                                         't5answer',
                                         'name',
                                                # Key info
                                                'runner', 'corp', 'ice', 'upgrade', 'hardware', 'resource', 'identity',
                                                'operation', 'agenda', 'program', 'asset', 'event',
                                                # Card dscriptors
                                                'influence', 'unique', 'cost', 'trash_cost', 'memory_cost', 'strength',
                                                'advancement_cost', 'agenda_points', 'base_link',
                                                'influence_limit', 'minimum_deck_size', 'deck_limit',
                                                # keywords
                                                'morph', 'job', 'location', 'deepnet', 'ambush', 'blackops',
                                                'advertisement', 'lockdown', 'codegate', 'enforcer', 'genetics',
                                                'sabotage', 'chip', 'source', 'grayops', 'consumergrade', 'division',
                                                'priority', 'killer', 'link', 'directive', 'corp', 'sensie', 'caissa',
                                                'trap', 'remote', 'companion', 'deflector', 'hostile', 'vehicle', 'seedy',
                                                'bioroid', 'terminal', 'harmonic', 'deva', 'political', 'decoder', 'beanstalk',
                                                'psi', 'megacorp', 'cast', 'orgcrime', 'clone', 'reprisal', 'console', 'mythic',
                                                'corporation', 'clan', 'grail', 'ai', 'barrier', 'research', 'character', 'trojan',
                                                'stealth', 'triple', 'icebreaker', 'academic', 'securityprotocol', 'cyborg',
                                                'digital', 'connection', 'transaction', 'ritzy', 'fracter', 'region', 'expendable',
                                                'virus', 'expansion', 'offsite', 'industrial', 'daemon', 'virtual', 'observer',
                                                'facility', 'current', 'alliance', 'cloud', 'executive', 'condition', 'policedepartment',
                                                'weapon', 'destroyer', 'tracer', 'unorthodox', 'natural', 'sentry', 'subsidiary',
                                                'security', 'gear', 'sysop', 'next', 'run', 'double', 'gmod', 'ap',
                                                'cybernetic', 'initiative', 'illicit', 'mod', 'government', 'public']].copy()



  and should_run_async(code)


## Save Data

In [None]:
## Save datasets
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

  and should_run_async(code)


Mounted at /content/drive


In [None]:
# Save sets to Google Drive
nr_cards_data_augment.to_csv('/content/drive/My Drive/ds266proj/nr_cards_data_augment.csv', index=False)
nr_cards_data_text.to_csv('/content/drive/My Drive/ds266proj/nr_cards_data_text.csv', index=False)

hs_cards_data_augment.to_csv('/content/drive/My Drive/ds266proj/hs_cards_data_augment.csv', index=False)
hs_cards_data_text.to_csv('/content/drive/My Drive/ds266proj/hs_cards_data_text.csv', index=False)



In [None]:
# Examples
# with pd.option_context('display.max_colwidth', None):
#   print(nr_cards_data_text['description'].head(1))

# with pd.option_context('display.max_colwidth', None):
#   print(hs_cards_data_text['t5answer'].head(1))

# with pd.option_context('display.max_colwidth', None):
#   print(nr_cards_data_text['t5prompt'][455:487])

# with pd.option_context('display.max_colwidth', None):
#   print(hs_cards_data_text['t5prompt'].head(20))

# with pd.option_context('display.max_colwidth', None):
#   print(nr_cards_data_text['t5generate'].head(2))



with pd.option_context('display.max_colwidth', None):
  print(nr_cards_data_text['t5generate'].head(2))

0     generate: A neutral-corp, asset card using seed angry slice.
1    generate: A neutral-corp, asset card using seed gigantic min.
Name: t5generate, dtype: object
