In [1]:
def get_spelling(word):
    return '-'.join([char.upper() for char in word])

# Get the words and their correct spelling by grade from 1-5.
with open("GradeSpellingEval.txt", "r") as file:
    lines = file.readlines()
    lines = [l.replace('\n', '').replace('*', '') for l in lines]
    grade_indices = [i for i in range(len(lines)-1) if lines[i].startswith("GRADE")] + [len(lines)]
    words_by_grade = {}
    for i, idx in enumerate(grade_indices):
        if i+1 < len(grade_indices):
            words_by_grade[i+1] = []
            for l in range(idx+2, grade_indices[i+1] - 1):
                words_by_grade[i+1].append((lines[l].strip(), get_spelling(lines[l].strip())))

words_by_grade[3][:10]

[('able', 'A-B-L-E'),
 ('above', 'A-B-O-V-E'),
 ('afraid', 'A-F-R-A-I-D'),
 ('afternoon', 'A-F-T-E-R-N-O-O-N'),
 ('again', 'A-G-A-I-N'),
 ('age', 'A-G-E'),
 ('air', 'A-I-R'),
 ('airplane', 'A-I-R-P-L-A-N-E'),
 ('almost', 'A-L-M-O-S-T'),
 ('alone', 'A-L-O-N-E')]

In [None]:
import random

def create_few_shot_prompt(word, word_list, num_shots):
    # Creates a few shot prompt from the word list, using word as the actual result to answer.
    prompt = ''
    if word in word_list:
        word_list.remove(word) # Ensure we don't sample the same word we want to spell, giving the model the answer.
    if num_shots > 0:
        samples = random.sample(word_list, num_shots)
        for sample in samples:
            prompt += f"Q: How do you spell '{sample[0]}'? A: {sample[1]}\n\n"
    prompt += f"Q: How do you spell '{word}'? A: {word[0].upper()}-"
    return prompt

create_few_shot_prompt('able', words_by_grade[3], 2)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-j-6B")

In [None]:
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
model.to(device)
model.config.pad_token_id = tokenizer.eos_token_id # Prevent lots of info messages telling us it's doing this every prompt.

prompt = "Once upon a time"
input_ids = tokenizer.encode(prompt, return_tensors='pt').to(device)
output = model.generate(input_ids, max_length=50, num_return_sequences=1)
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)

In [None]:
import tqdm.notebook as tqdm

# Give the model a few-shot problem and give it the first letter. (Without it, just answers ???? or ____)
# Last line is "Q: How do you spell the word 'example'? A: E-"
def assess_model_on_words(model, word_list, num_shots):
    data = {grade: [] for grade in word_list.keys()}
    successes = {grade: 0 for grade in word_list.keys()}
    for grade in word_list:
        print(f"Assessing Grade {grade}")

        for i in tqdm.tqdm(range(len(word_list[grade]))):
            word = word_list[grade][i]
            #words = word_list[grade][batch_size*i:max(len(word_list[grade]), batch_size*(i+1))]
            prompt = create_few_shot_prompt(word[0], word_list[grade], num_shots)
            inputs = tokenizer.encode_plus(prompt, return_tensors='pt').to(device) # Tokens, attention mask
            output = model.generate(**inputs, max_new_tokens=2*len(word[0]), num_return_sequences=1)
            response = tokenizer.decode(output[0], skip_special_tokens=True)
            response = response.split('\n\n')[num_shots]
            response = response.split('A:')[1].strip()
            data[grade].append((word, response))

    return data

data = assess_model_on_words(model, words_by_grade, 10)

In [None]:
# Check how closely model's spelling ability matches grade.
print(data[1][:10])
successes = {grade: 0 for grade in words_by_grade.keys()}
for grade in data.keys():
    successes[grade] = round([word[1] == word[0][1] for word in data[grade]].count(True) / len(data[grade]), 3)
successes

In [None]:
import plotly.graph_objects as go
import plotly.io as pio

pio.renderers.default = "notebook"
successes = {1: 0.978, 2: 0.875, 3: 0.668, 4: 0.579, 5: 0.595}

def basic_bar_graph(data):
    fig = go.Figure(go.Bar(x=list(data.keys()), y=list(data.values())))
    fig.update_layout(yaxis=dict(range=[0, 1]))
    fig.show()

basic_bar_graph(successes)

In [None]:
# Check how closely model's spelling ability matches length of the word.
lengths = {i: {'success': 0, 'total': 0} for i in range(1, 13)}
for grade in data.keys():
    for word in data[grade]:
        length = len(word[0][0])
        lengths[length]['total'] += 1
        if word[1] == word[0][1]:
            lengths[length]['success'] += 1

def success_rate_bar_graph(data):
    success_rates = {key: val['success'] / val['total'] if val['total'] > 0 else 0 for key, val in data.items()}
    sample_sizes = {key: val['total'] for key, val in data.items()}

    fig = go.Figure(go.Bar(
        x=list(success_rates.keys()), 
        y=list(success_rates.values()),
        text=[f"n={n}" for n in sample_sizes.values()],  # Sample sizes as text
        textposition='auto'  # Positioning the text inside the bars
    ))
    
    fig.update_layout(yaxis=dict(range=[0, 1]))
    fig.show()

success_rate_bar_graph(lengths)
print(lengths)