In [2]:
import json
import re

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

import CONSTANTS as const

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

Loading checkpoint shards: 100%|██████████| 2/2 [00:28<00:00, 14.15s/it]


In [4]:
characters = []

for i in range(1, 9):
    with open(f"characters/formatted_characters/0{i}_formatted.json", "r", encoding="utf8") as fh:
        characters.append(json.loads(fh.read()))
characters

[{'character_name': 'Geoffry Carde',
  'class': 'Rogue',
  'subclass': 'null',
  'level': 1,
  'background': 'Entertainer',
  'race': 'Forest Gnome',
  'alignment': None,
  'experience_points': 0,
  'max_hit_points': 12,
  'abilities': {'strength': 11,
   'dexterity': 18,
   'constitution': 12,
   'intelligence': 10,
   'wisdom': 14,
   'charisma': 11},
  'proficiency_bonus': 2,
  'expertise': ['Sleight of Hand', 'Perception'],
  'proficiencies': ['Dexterity Saving Throws',
   'Performance',
   'Sleight of Hand',
   'Rapier',
   'Longsword',
   'Simple Weapons',
   "Thieves' Tools",
   'Perception',
   'Insight',
   'Acrobatics',
   'Disguise Kit',
   'Intelligence Saving Throws',
   'Tantan',
   'Light Armor',
   'Shortsword',
   'Investigation',
   'Crossbow, Hand'],
  'languages': ['Gnomish', 'Common', "Thieves' Cant"],
  'equipment': ['Oil (flask)',
   'Waterskin',
   'Crowbar',
   'Rope, Hempen (50 feet)',
   'Candle',
   'Shortbow',
   'String',
   'Hammer',
   'Dagger',
   'Pito

In [9]:
basics = [
    {
        "question": lambda c: f"What is {c['character_name']}'s race?",
        "answer": lambda c: c["race"]
    },
    {
        "question": lambda c: f"What is {c['character_name']}'s class?",
        "answer": lambda c: c["class"]
    },
    {
        "question": lambda c: f"What is {c['character_name']}'s background?",
        "answer": lambda c: c["background"]
    }
]

In [19]:
def evaluate(character=None, question=None):
    encodeds = tokenizer.apply_chat_template([{
        "role": "user",
        # mistral does not support a "system" prompt. We prepend it to the user prompt so that we have some relatability between GPT-* and mistral
        "content": f"{const.SYSTEM_MESSAGE}\n\nHere is the character sheet: {json.dumps(character)}\n\n{question['question'](character)}",
    }], return_tensors="pt")
    model_inputs = encodeds.to(device)
    model.to(device)

    generated_ids = model.generate(model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
    decodeds = tokenizer.batch_decode(generated_ids)

    response = re.search(r"\[/INST\](.*?)</s>", decodeds[0], flags=re.DOTALL)
    if not response:
        raise Exception("Expected to response to look like '<s>[INST] instructions [/INST] response </s>")
    response_content = response.group(1).strip()

    # We omit the system prompt, so that at judging time the conversation can be more
    #  comparable to a ChatGPT conversation (which also omits the system prompt)
    # The intuition here is that at judging time want to evaluate the quality of the system prompt itself,
    #  not simply assess how well the (possibly mediocre) system prompt was respected
    return {
        "question": question["question"](character),
        "prediction": response_content,
        "answer": question["answer"](character)
    }

In [20]:
responses = []
for c in characters:
    for q in basics:
        responses.append(evaluate(c, q))
responses

[{'question': "What is Geoffry Carde's race?",
  'prediction': 'Geoffrey Carde is a Forest Gnome.',
  'answer': 'Forest Gnome'},
 {'question': "What is Geoffry Carde's class?",
  'prediction': "Geoffry Carde's class is Rogue.",
  'answer': 'Rogue'},
 {'question': "What is Geoffry Carde's background?",
  'prediction': "Geoffry Carde's background is Entertainer.",
  'answer': 'Entertainer'},
 {'question': "What is Vortex's race?",
  'prediction': "Vortex's race is Air Genasi.",
  'answer': 'Air Genasi'},
 {'question': "What is Vortex's class?",
  'prediction': "Vortex's class is Sorcerer.",
  'answer': 'Sorcerer'},
 {'question': "What is Vortex's background?",
  'prediction': 'Vortex\'s background is "Noble".',
  'answer': 'Noble'},
 {'question': "What is Lozildilk's race?",
  'prediction': "Lozildilk's race is Kalashtar.",
  'answer': 'Kalashtar'},
 {'question': "What is Lozildilk's class?",
  'prediction': 'Lozildilk\'s class is Warlock, and they have chosen The Fathomless as their sub

In [None]:
questions = [
    {
        "question": "Initiative",
        "should_roll": True,
    },
    {
        "question": "Roll for Initiative",
        "should_roll": True,
    },
    {
        "question": "What is your Initiative modifier?",
        "should_roll": False
    },
    {
        "history": [
            "Roll Initiative",
            "Initiative Order:\n- Player: 24\n- You: 6\n\nCurrent State:\n- Player: 7 HP\n - You: 12 HP"
        ],
        "question": "What would you like to do?",
        "should_roll": False
    },
    {
        "question": "What is your AC?",
        "should_roll": False
    },
    {
        "question": "What is your Armor Class?",
        "should_roll": False
    },
    {
        "question": "What are your current hit points?",
        "should_roll": False
    },
    {
        "history": [
            "Roll for initiative!",
        ],    
        "question": "The fighter swings a longsword at you. He rolled a 15. With his +4 attack roll, the total to hit is 15 + 4 = 19.",
        "should_roll": False
    },
    {
        "history": [
            "Roll for initiative!",
        ],    
        "question": "You take 11 points of slashing damage from the player's longsword. Please update your current hit points.",
        "should_roll": False
    },
    {
        "history": [
            "Roll for initiative!",
            "You take 11 points of slashing damage from the player's longsword. Please update your current hit points.",
        ],    
        "question": "What are your current hit points?",
        "should_roll": False
    },
]