# Compile the Input Data

In [31]:
import json
import random

## Create Dictionaries from JSON files

Each entry string in the dictionary takes the form:

#### < type=typename classname > * entry

< is the meta-start token
\> is the meta-end token
\* is the text start token

In [32]:
f = open('../data_files/pokedex_entries.json')
pokedex_entries = json.load(f)
f.close()

f = open('../data_files/pokemon_types.json')
pokemon_types = json.load(f)
f.close()

f = open('../data_files/pokemon_class.json')
pokemon_class = json.load(f)
f.close()

In [33]:
def compile_input(entry, p_type, p_class):
    return "< type=" + p_type + " " + p_class + " > * " + entry

In [34]:
# The list of inputs
input = list()

# Loop through every pokemon
for p_entry, p_type, p_class in list(zip(pokedex_entries, pokemon_types, pokemon_class)):

    # For every entry, create an input from each entry
    for entry in pokedex_entries[p_entry]:
        new_input = compile_input(entry, pokemon_types[p_type][0], pokemon_class[p_class])
        input.append(new_input)

### Create a Test-Train Split

In [35]:
# Shuffle the Data
random.shuffle(input)

# Find the index to split the data at
split_index = round(len(input) * 0.8)

# Split the input into testing and training splits
train_data = input[:split_index]
test_data =  input[split_index:]

In [36]:
# Write the dictionaries to files
with open("../data_files/train_dataset.json", "w", encoding='utf-8') as f:
    json.dump(train_data, f, indent=4, sort_keys=True)

with open("../data_files/test_dataset.json", "w", encoding='utf-8') as f:
    json.dump(test_data, f, indent=4, sort_keys=True)