In [1]:
import json

from pprint import pprint
import torch
import transformers

from bible import Bible
from embeddings import load_embeddings
from models import ModelWrapper

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
bible = Bible("data/nrsv_bible.xml", "data/chapter_index_map.json")
device = torch.device("cpu")
with open("data/top_50.json") as f:
    top_50 = json.load(f)

In [3]:
gpt2_xl = transformers.GPT2Model.from_pretrained("gpt2-xl").to("cpu")
gpt2_xl_tokenizer = transformers.GPT2Tokenizer.from_pretrained("gpt2-xl")
whole_embedding = torch.load("embeddings/gpt2-xl_whole.pt", map_location="cpu")
summary_embedding = torch.load("embeddings/gpt2_xl.pt", map_location="cpu")

In [4]:
gpt2_xl_whole = ModelWrapper(
        model=gpt2_xl,
        tokenizer=gpt2_xl_tokenizer,
        bible=bible,
        embedding=whole_embedding,
        name="gpt2_xl_whole",
        device=device,
    )

gpt2_xl_summary = ModelWrapper(
        model=gpt2_xl,
        tokenizer=gpt2_xl_tokenizer,
        bible=bible,
        embedding=summary_embedding,
        name="gpt2_xl_whole",
        device=device,
    )

In [6]:
sent1 = "Moses escapes from Egypt with his people crossing the red sea"
sent2 = "God created the world in 6 days"
sent3 = "Jesus is born in Bethlehem"
sent4 = "Jesus is crucified on the cross"
sent5 = "Jesus is resurrected from the dead"
sent6 = "David defeats Goliath with a slingshot"
sent7 = "God tests Abraham's faith by asking him to sacrifice his son"

In [19]:
pprint(gpt2_xl_whole.get_related_n_chapters(sent4, 5, with_text=False))

['John 19', 'John 18', 'John 11', 'John 1', 'John 21']


In [23]:
gpt2_xl_whole.get_top_n_acc(top_50, 5)

0.14

In [20]:
pprint(gpt2_xl_summary.get_related_n_chapters(sent4, 5, with_text=False))

['Matthew 27', 'Mark 15', 'John 19', 'Luke 23', 'John 5']


In [25]:
gpt2_xl_summary.get_top_n_acc(top_50, 5)

0.52