In [1]:
%load_ext autoreload
%autoreload 2

# T5

In [2]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import datasets
import evaluate
from tqdm import tqdm
from src import SCRATCH_CACHE_DIR

CNNDM = "cnn_dailymail"
MODEL_NAME = "google-t5/t5-small"

cnn_dailymail = datasets.load_dataset(CNNDM, "3.0.0", cache_dir=SCRATCH_CACHE_DIR)

tokenizer = T5Tokenizer.from_pretrained(
    "t5-small", cache_dir=SCRATCH_CACHE_DIR, legacy=False
)
model = T5ForConditionalGeneration.from_pretrained(
    "t5-small", cache_dir=SCRATCH_CACHE_DIR
)
# Test:
test_source = cnn_dailymail["test"]["article"]
test_reference = cnn_dailymail["test"]["highlights"]
tokenizer_source = tokenizer(
    ["summarize: " + s for s in test_source],
    padding=True,
    truncation=True,
    return_tensors="pt",
)
source_input_ids = tokenizer_source.input_ids


  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [13]:
import torch
input_ids = tokenizer("The man ate a cake and ", return_tensors="pt").input_ids
labels1 = tokenizer("he was very happy", return_tensors="pt").input_ids
labels2 = tokenizer("he was very sad", return_tensors="pt").input_ids
outputsmerged = model(input_ids=torch.cat([input_ids, input_ids], dim=0), labels=torch.cat([labels1, labels2], dim=0))
outputsmerged

Seq2SeqLMOutput(loss=tensor(3.6858, grad_fn=<NllLossBackward0>), logits=tensor([[[-11.4534,  -5.8476, -10.4851,  ..., -40.0559, -40.1034, -40.0408],
         [-13.8111,  -6.6404,  -2.7956,  ..., -33.6582, -33.6690, -33.6350],
         [-13.9721,  -0.9406,  -7.8447,  ..., -32.3863, -32.4703, -32.4304],
         [-14.8747,  -1.1544,  -7.9899,  ..., -30.9319, -31.0203, -31.0223],
         [-16.0176,  -2.2045,  -8.2139,  ..., -31.3451, -31.3628, -31.3930],
         [-17.3993,  -1.8474,  -7.9838,  ..., -36.6698, -36.6658, -36.6861]],

        [[-11.4534,  -5.8476, -10.4851,  ..., -40.0559, -40.1034, -40.0408],
         [-13.8111,  -6.6404,  -2.7956,  ..., -33.6582, -33.6690, -33.6350],
         [-13.9721,  -0.9406,  -7.8447,  ..., -32.3863, -32.4703, -32.4304],
         [-14.8747,  -1.1544,  -7.9899,  ..., -30.9319, -31.0203, -31.0223],
         [-16.0176,  -2.2045,  -8.2139,  ..., -31.3451, -31.3628, -31.3930],
         [-16.7442,  -1.1632,  -6.6106,  ..., -34.5479, -34.5422, -34.5391]]],


In [11]:
# Define input sequences and labels
input_texts = [
    "The <extra_id_0> walks in <extra_id_1> park",
    "Another <extra_id_0> sentence <extra_id_1> here",
    # Add more input sequences as needed
]

label_texts = [
    "<extra_id_0> cute dog <extra_id_1> the <extra_id_2>",
    "<extra_id_0> more labels <extra_id_1> here <extra_id_2>",
    # Add more labels as needed
]

# Tokenize input sequences and labels
inputs = tokenizer(input_texts, return_tensors="pt", padding=True, truncation=True)
labels = tokenizer(label_texts, return_tensors="pt", padding=True, truncation=True)

# Forward pass through the model
# [5.9605e-07, 1.0089e+01, 4.7889e+00, 1.0667e+00, 1.2452e-01, 1.4478e-01, 1.0272e+01] 3.7837
# [5.9605e-07, 1.0089e+01, 4.7889e+00, 1.0667e+00, 1.2452e-01, 1.4478e-01,
#  1.0272e+01, 7.0121e+00, 5.0519e+00, 1.5421e+01, 1.3456e+00, 2.0305e+00,
#  1.0104e+00, 8.2953e+00] 4.7609

outputs = model(input_ids=inputs["input_ids"][1:2, :], labels=labels["input_ids"][1:2, :])
inputs["input_ids"].shape, outputs.loss

(torch.Size([2, 7]), tensor(5.7381, grad_fn=<NllLossBackward0>))

In [14]:
import torch
a = torch.tensor([5.9605e-07, 1.0089e+01, 4.7889e+00, 1.0667e+00, 1.2452e-01, 1.4478e-01,
 1.0272e+01, 7.0121e+00, 5.0519e+00, 1.5421e+01, 1.3456e+00, 2.0305e+00,
 1.0104e+00, 8.2953e+00])
a.reshape(2, -1).mean(axis=1)

tensor(3.7837)

In [None]:
outputs1 = model(input_ids=input_ids, labels=labels1)
outputs2 = model(input_ids=input_ids, labels=labels2)
outputsmerged = model(input_ids=torch.cat, labels=[labels1, labels2])
outputs1.loss, outputs2.loss, outputsmerged.loss

In [4]:
output_beam = model.generate(
    source_input_ids[0:16],
    max_length=150,
    num_beams=4,
    num_return_sequences=4,
    early_stopping=True,
    output_scores=True,
    return_dict_in_generate=True,
)

In [5]:
predictions = tokenizer.batch_decode(output_beam["sequences"], skip_special_tokens=True)

In [9]:
output_beam["sequences_scores"]

tensor([-0.3166, -0.3170, -0.3212, -0.3273, -0.3188, -0.3355, -0.3517, -0.3543,
        -0.2504, -0.2543, -0.2629, -0.2650, -0.2873, -0.2912, -0.2966, -0.3059,
        -0.2323, -0.2349, -0.2691, -0.2712, -0.3990, -0.4024, -0.4041, -0.4620,
        -0.2384, -0.2414, -0.2497, -0.2525, -0.2510, -0.2703, -0.2887, -0.2894,
        -0.3178, -0.3243, -0.3324, -0.3325, -0.2032, -0.2107, -0.2286, -0.2342,
        -0.3042, -0.3123, -0.3174, -0.3290, -0.3191, -0.3212, -0.3248, -0.3294,
        -0.2452, -0.2528, -0.2666, -0.2703, -0.2449, -0.2531, -0.2532, -0.2595,
        -0.3635, -0.3878, -0.4050, -0.4063, -0.3695, -0.3699, -0.3801, -0.4081])

In [6]:
output_beam.__dict__

{'sequences': tensor([[    0,     8,  7692,  ...,     0,     0,     0],
         [    0,     8,  7692,  ...,     0,     0,     0],
         [    0,     8,  7692,  ...,     0,     0,     0],
         ...,
         [    0, 20723,    31,  ...,     0,     0,     0],
         [    0, 20723,    31,  ...,     0,     0,     0],
         [    0, 20723,    31,  ...,     0,     0,     0]]),
 'sequences_scores': tensor([-0.3166, -0.3170, -0.3212, -0.3273, -0.3188, -0.3355, -0.3517, -0.3543,
         -0.2504, -0.2543, -0.2629, -0.2650, -0.2873, -0.2912, -0.2966, -0.3059,
         -0.2323, -0.2349, -0.2691, -0.2712, -0.3990, -0.4024, -0.4041, -0.4620,
         -0.2384, -0.2414, -0.2497, -0.2525, -0.2510, -0.2703, -0.2887, -0.2894,
         -0.3178, -0.3243, -0.3324, -0.3325, -0.2032, -0.2107, -0.2286, -0.2342,
         -0.3042, -0.3123, -0.3174, -0.3290, -0.3191, -0.3212, -0.3248, -0.3294,
         -0.2452, -0.2528, -0.2666, -0.2703, -0.2449, -0.2531, -0.2532, -0.2595,
         -0.3635, -0.3878, -0.

In [35]:
rouge = evaluate.load("rouge")
rouge.compute(predictions=predictions, references=[ref for ref in test_reference[0:16] for _ in range(4)], use_aggregator=False)

{'rouge1': [0.3661971830985916,
  0.33333333333333337,
  0.35616438356164387,
  0.3661971830985916,
  0.4444444444444444,
  0.4444444444444444,
  0.3950617283950617,
  0.3950617283950617,
  0.07228915662650602,
  0.07228915662650602,
  0.075,
  0.075,
  0.2222222222222222,
  0.25641025641025644,
  0.2368421052631579,
  0.21333333333333335,
  0.4705882352941176,
  0.5,
  0.4705882352941176,
  0.5,
  0.30769230769230765,
  0.30303030303030304,
  0.30303030303030304,
  0.39344262295081966,
  0.6538461538461539,
  0.45714285714285713,
  0.5833333333333334,
  0.3917525773195876,
  0.3404255319148936,
  0.3404255319148936,
  0.41758241758241754,
  0.3218390804597701,
  0.29999999999999993,
  0.32653061224489793,
  0.32,
  0.3103448275862069,
  0.3492063492063492,
  0.34375,
  0.3492063492063492,
  0.34375,
  0.4210526315789474,
  0.36363636363636365,
  0.3928571428571428,
  0.47457627118644075,
  0.0923076923076923,
  0.08695652173913043,
  0.0909090909090909,
  0.09375000000000001,
  0.0563

In [9]:
output_sampling = model.generate(
    source_input_ids[0:1],
    max_length=150,
    do_sample=True,
    num_beams=5,
    early_stopping=True,
    num_return_sequences=5,
    output_scores=True,
    return_dict_in_generate=True,
)

In [11]:
output_sampling["sequences_scores"]

tensor([-0.3506, -0.3527, -0.3625, -0.3645, -0.3848])

In [23]:
tokenizer.batch_decode(output_sampling[0], skip_special_tokens=True)

['court given the 123rd member of the ICC to alleged crimes. the deal opens a preliminary examination into alleged crimes in occupied palestinians. the international criminal court will be held in the mid-tomorrday evening (dursday) the ICC opened the first international inquiry into the situation in the occupied territories.',
 'the 123rd international member of the international Criminal court was announced on Wednesday. the ICC already agreed on its founding Rome Statute in January. "the world is also a step closer," the ICC official says.',
 'ipcc has become the 123rd member of the international criminal court. the legal agreement gives the international court jurisdiction on alleged crimes in their territories. a preliminary examination into the situation in other palestinians paved the way for possible war crimes. a human rights activist said the request was a move toward greater justice.',
 "the ICC became 123rd member of the international criminal court on the rights. the Pales

In [None]:
# Batch
batch_size = 8
num_batches = source_input_ids.shape[0] // batch_size
outputs = []
for i in tqdm(range(1)):
    outputs.append(
        model.generate(
            source_input_ids[i * batch_size : (i + 1) * batch_size],
            max_new_tokens=50,
            num_beams=5,
            num_return_sequences=5,
            early_stopping=True,
            output_scores=True,
        )
    )

# LLama3

In [2]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
)
from src import HF_TOKEN, SCRATCH_CACHE_DIR

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
tokenizer_name = model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_name,
            padding_size="left",
            token=HF_TOKEN,
            cache_dir=SCRATCH_CACHE_DIR,
        )
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto",
    token=HF_TOKEN,
    cache_dir=SCRATCH_CACHE_DIR,
)
model.eval()

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 4/4 [00:20<00:00,  5.06s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear8bitLt(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear8bitLt(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear8bitLt(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear8bitLt(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear8bitLt(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear8bitLt(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )


In [56]:
input_text = """Summarize this excerpt in one sentence. The summary should also answer the question: What is the main takeway from the excerpt? Story: Noork squinted. So the Misty Ones were not entirely invisible. Pain\nwas growing in his numbed arm now, but as it came so came strength. He\nclimbed further out on the great branch to where sticky and overripe\nfruit hung heavy. With a grin he locked his legs upon the forking of\nthe great limb and filled his arms with fruit.\n \n  A barrage of the juicy fruit blanketed the misty shapes. Stains spread\nand grew. Patchy outlines took on a new color and sharpness. Noork\nfound that he was pelting a half-dozen hooded and robed creatures whose\narms and legs numbered the same as his own, and the last remnant of\nsuperstitious fear instilled in his bruised brain by the shaggy Vasads\nvanished.\n \n  These Misty Ones were living breathing creatures like himself! They\nwere not gods, or demons, or even the ghostly servants of demons. He\nstrung his bow quickly, the short powerful bow that Gurn had given him,\nand rained arrows down upon the cowering robed creatures.\n \n  And the monsters fled. They fled down the trail or faded away into the\njungle. All but one of them. The arrow had pierced a vital portion of\nthis Misty One's body. He fell and moved no more.\n \n  A moment later Noork was ripping the stained cloak and hood from the\nfallen creature, curious to learn what ghastly brute-thing hid beneath\nthem. His lip curled at what he saw.\n \n  The Misty One was almost like himself. His skin was not so golden as\nthat of the other men of Zuran, and his forehead was low and retreating\nin a bestial fashion. Upon his body there was more hair, and his face\nwas made hideous with swollen colored scars that formed an irregular\ndesign. He wore a sleeveless tunic of light green and his only weapons\nwere two long knives and a club.\n \n  \"So,\" said Noork, \"the men of the island prey upon their own kind. And\nthe Temple of Uzdon in the lake is guarded by cowardly warriors like\nthis.\"\n \n
Summary:"""
inputs = tokenizer(input_text, return_tensors='pt')
max_new_tokens = 50
sample_outputs = model.generate(
    inputs['input_ids'].to("cuda"),
    num_beams=1,
    do_sample=True,
    temperature=1,
    max_new_tokens=50,
    num_return_sequences=10,
    early_stopping=True,
    eos_token_id=tokenizer.eos_token_id,
    output_logits=True,
    output_scores=True,
    return_dict_in_generate=True,
    )

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


In [57]:
len(sample_outputs["logits"]), sample_outputs["logits"][0].shape

(50, torch.Size([10, 128256]))

In [60]:
tokenizer.decode(sample_outputs["sequences"][0], skip_special_tokens=True,).split("Summary:")[-1]

' Noork, who was initially fearful of the Misty Ones, discovers that they are human-like creatures and not gods or demons, and as a result, his fear turns to anger and he attacks them, eventually killing one and discovering that they are similar'

In [65]:
from textwrap import wrap
for i in range(10):
    print(sample_outputs["sequences"][i][-50:])
    print("\n".join(wrap(tokenizer.decode(sample_outputs["sequences"][i], skip_special_tokens=True,).split("Summary:")[-1], 175)))
    print("="*60)

tensor([ 2360,   672,    11,   889,   574, 15453, 63757,   315,   279, 35198,
           88, 74933,    11, 52114,   430,   814,   527,  3823, 12970, 20566,
          323,   539, 29913,   477, 45075,    11,   323,   439,   264,  1121,
           11,   813,  8850, 10800,   311, 19788,   323,   568,  8951,  1124,
           11,  9778, 13419,   832,   323, 42687,   430,   814,   527,  4528],
       device='cuda:0')
 Noork, who was initially fearful of the Misty Ones, discovers that they are human-like creatures and not gods or demons, and as a result, his fear turns to anger and he
attacks them, eventually killing one and discovering that they are similar
tensor([ 2360,   672, 52114,   430,   279, 35198,    88, 74933,    11,  8767,
         3463,   311,   387, 50058, 20566,    11,   527,  3604, 93209, 23837,
          449,  7106, 17910,  4528,   311,  1884,   315,  3026,    13,   578,
         1925, 89474,   505,   420, 50565,   374,   430,  8850,   323, 94705,
          684,   649,  3629,

In [None]:
# temperature = 2
"""
 The story is about Noork, a character who discovers that the Misty Ones are not gods or demons, but just another kind of human. With this realization, Noork attacks them,
ripping off their hood and robes to see what's beneath.
============================================================
 In the story, Noork spots misty human shapes and attacks them, finding them to be breathing creatures like himself who run away after being defeated in combat. After killing
one of them, he rips off its clothes to reveal that they look similar
============================================================
 In this excerpt, Noork confronts the "Misty Ones", the creatures he had previously revered and feared as invisible or demonic beings. He uses fruit to strip away the misty
aura and discovers that they are human, like himself, who live
============================================================
 Noork discovers that the "Misty Ones", beings he previously thought were ghostly, are actually mortal humans who are engaged in tribal violence against each other. He attacks
them, causing them to flee, but finds one critically injured and investigates. The
============================================================
 The excerpt describes Noork's encounter with a mysterious group of figures known as the Misty Ones. As he approaches and attacks them with fruit and later an arrow, Noork
discovers that they are mortals similar to himself and not deities or monstrous
============================================================
 Noork's attack on the Misty Ones reveals that they are mortal creatures, not supernatural beings. With the realization that they are not gods, his fear fades and he attacks
them with ease. After defeating the attackers, Noork discovers that they have
============================================================
 According to the excerpt, Noork discovered that the Misty Ones, long considered demons or gods, were actually living breathing creatures much like himself. He attacked them
with arrows, and one of them fell after being pierced in the vital organs, giving No
============================================================
 This excerpt tells the story of Noork, a character who discovers the Misty Ones, creatures he previously thought to be magical. He finds out they are living, breathing
creatures, and that they are almost human. However, instead of being beautiful,
============================================================
 Noork climbs a great branch to attack "the Misty Ones" with overripe fruit. He pelts them until they flee or dissolve, except for one who is fatally injured. Upon killing
him, Noork is shocked to discover that the "
============================================================
  In this excerpt from Noork's story, Noork attacks a group of  Misty Ones, half-dozen creatures wearing robed garb, and discovers that  they are human like him. With his bow
and arrow, he chases
============================================================

"""

In [None]:
# default everything
"""
 Noork, a young man, discovers that the Misty Ones, creatures he had feared as supernatural beings, are actually living beings like himself. He attacks them with his bow and
arrow, and they flee, except for one who is mortally wounded
============================================================
 Noork, a warrior, discovers that the "Misty Ones" he has been fighting are not supernatural beings, but rather living, breathing creatures like himself. He kills one of them
and finds that it is almost identical to a human, but with
============================================================
 Noork, a character from the story, discovers that the "Misty Ones" he has been fearing are actually living, breathing creatures like himself. He attacks them with his bow and
arrow, and they flee, except for one who is killed.
============================================================
 Noork, a warrior, discovers that the "Misty Ones" he has been fighting are actually living, breathing creatures like himself. He kills one of them and removes its cloak and
hood to reveal a humanoid figure with a bestial appearance. The
============================================================
 Noork, a young man, discovers that the Misty Ones, creatures he had previously feared, are actually living beings like himself. He attacks them with arrows and kills one,
then discovers that they are similar to humans, but with physical differences.
============================================================
 Noork, a young warrior, discovers that the Misty Ones, previously thought to be supernatural beings, are actually living creatures like himself. He attacks and kills one of
them, revealing that they are similar to humans but with physical differences. The main
============================================================
 Noork, a character from the story, discovers that the Misty Ones, which he had previously believed to be supernatural beings, are actually living, breathing creatures like
himself. He attacks them with his bow and arrow, and one of them falls,
============================================================
 Noork, a young man, discovers that the Misty Ones, creatures he had feared and revered, are actually living beings like himself. He attacks them with his bow and arrows, and
they flee, except for one who is killed. Noork
============================================================
 Noork, a character from the story, discovers that the Misty Ones, creatures he had previously feared, are actually living beings like himself. He attacks them with his bow
and arrows, and they flee, except for one who is fatally wounded.
============================================================
 Noork, a warrior from Zuran, discovers that the Misty Ones, creatures he had previously feared, are actually living, breathing beings like himself. He attacks them with his
bow and arrow, killing most of them, but one is left wounded
============================================================
"""

In [None]:
# top_k = 640
"""
 Noork, a brave warrior, discovers that the Misty Ones, which he had previously feared, are actually living, breathing creatures like himself. He kills one of them, and finds
that it is almost identical to him, with some physical differences.
============================================================
 Noork, a character in the story, discovers that the Misty Ones, which he had previously feared, are actually living breathing creatures like himself. He attacks them with his
bow and arrow, and they flee except for one who is mortally wounded
============================================================
 The excerpt describes Noork's encounter with the Misty Ones, a group of hooded and robed creatures that he initially fears as supernatural beings. However, after pelting them
with fruit, he discovers that they are living, breathing creatures like himself
============================================================
 Noork, a young man from Zuran, discovers that the Misty Ones, creatures he had previously feared, are actually living beings like himself. He attacks them with his bow and
arrow, and one of them falls, revealing a humanoid creature with
============================================================
 Noork, a warrior from Zuran, encounters the Misty Ones, a group of hooded and robed creatures he had previously feared as supernatural beings. He discovers that they are
actually living, breathing creatures like himself, and after a brief battle
============================================================
 Noork, a warrior, discovers that the "Misty Ones" he had been fighting are not supernatural beings, but rather living, breathing creatures who are similar to himself. He
kills one of them and finds that it is almost identical to himself,
============================================================
 Noork, a young man from the island of Zuran, discovers that the Misty Ones, which he had previously thought were gods or demons, are actually living breathing creatures like
himself. He attacks them with his bow and arrows, and they flee
============================================================
 The excerpt describes Noork, a protagonist, who is climbing a tree to gather fruit when he spots the Misty Ones, creatures he had previously feared as gods or demons. As he
pelts them with fruit, he realizes they are living, breathing
============================================================
 Noork, a brave warrior, discovers that the Misty Ones are not supernatural beings, but rather living, breathing creatures like himself. He kills one of them and finds that it
is almost identical to himself, with a similar appearance and even similar scars
============================================================
 Noork, a young man, discovers that the "Misty Ones" he has been fearing are actually living, breathing creatures that are similar to himself. He kills one of them and
discovers that they are not gods or demons, but rather ordinary humans
============================================================
"""

In [None]:
# top_p = 0.95
"""
 Noork, a warrior from Zuran, encounters the Misty Ones, creatures he had previously believed to be gods or demons. He learns that they are actually living, breathing
creatures like himself, and they flee when he attacks them with his bow and
============================================================
 Noork, a young man from Zuran, is on a journey to the island of the Misty Ones, where he encounters the creatures and discovers they are living, breathing beings like
himself. He attacks them with his bow and arrow, and only
============================================================
 Noork, a young man from Zuran, is surprised to discover that the "Misty Ones" are not supernatural beings, but rather living, breathing creatures like himself. He attacks
them with his bow and arrow, and one of them falls,
============================================================
 Noork, a character from the story, discovers that the Misty Ones, creatures he had previously feared, are actually living beings like himself. He attacks them with his bow
and arrow, and one of them falls, revealing that they are similar to
============================================================
 Noork, who has been fearing the Misty Ones, discovers that they are not gods or demons, but rather living, breathing creatures like himself. He attacks them with his bow and
arrow, and they flee, except for one who is mortally
============================================================
 Noork, a character in the story, discovers that the Misty Ones, previously thought to be supernatural beings, are actually living, breathing creatures. He attacks and kills
one of them, and is shocked to find that they are almost identical to himself
============================================================
 The excerpt describes Noork's encounter with the Misty Ones, a group of hooded and robed creatures that he initially fears as gods or demons. However, after pelting them with
fruit and realizing they are living, breathing creatures like himself,
============================================================
 Noork, a man from the island of Zuran, discovers that the Misty Ones, mysterious and feared creatures, are actually living beings like himself. He attacks them with his bow
and arrow, and they flee, except for one who is killed
============================================================
 The excerpt describes Noork, a young man from Zuran, who discovers that the Misty Ones, mysterious creatures he had previously feared, are actually living beings like
himself. He attacks them with an arrow and then removes their cloak and hood to reveal
============================================================
 Noork, a character from the story, discovers that the Misty Ones are not supernatural beings, but rather living creatures who are similar to himself. He attacks them with his
bow and arrow, and they flee, except for one who is mortally
============================================================
"""

In [13]:
# top_p = 0.95, temperature = 1.5

Summarize WW2. Summary: World War II (WWII) was a global conflict that lasted from 1939 to 1945. In this summary of WW2, we will explore the main events and causes of the war.
Caesium and radium, but their levels. This essay introduces some of the causes of World War II, such as Nazi Party ideology, German nationalism, and militarism, as well as the
consequences of the war, including the devastating loss of civilian
Summarize WW2. Summary: World War II (1939-1945)... WW2 in 15 words is: Germany invaded Poland and Japan attacked US, escalating into global conflict, millions killed. Summary
of World War II: Causes and Key Events Summary of World War II: Causes and Key Events... The invasion was followed by the Battle of Moscow, in which Russian soldiers won a
bloody victory against the Nazis, starting a decline in the German military.... WW2 also
Summarize WW2. Summary: World War II (WWII) was the deadliest conflict in human history, lasting from 1939 to 1945. Six major powers participat

In [11]:
# temp = 1.5
"""
Summarize WW2. Summary: The 20th Century World War 2, also referred to as the second global conflict, World War 2 lasted for four years, from September 1, 1939 to September 2,
1945. With the signing of the Treaty of Versailles in 1919, Germany, the loser of World War 1, was given a huge amount of penalties and reparations which added to national
resentment. How did the Soviet Union become
============================================================
Summarize WW2. Summary: World War II (1939-1945) was a global conflict that arose after the failure of the League of Nations and began with the invasion of Poland by Germany
and the annexation of Czechoslovakia. Essay on World War 2. Hitler, Italy’s Mussolini and Germany’s FDR wanted to create empires and destroy opponents. World War 2 Summary
World War II began in 1939, after Germany, led by Hitler,
============================================================
Summarize WW2. Summary: World War II, the global conflict that lasted from 1939-1945, was the most destructive war in human history. Germany, under Adolf Hitler and the Nazi
Party, sought to dominate Europe and invade the Soviet Union, leading to a war with the United States and other Allied nations. The conflict resulted in approximately 50...
World War II - Wikipedia, the free encyclopedia Summary Of World War 2 Essay. The Second World War was the
============================================================
Summarize WW2. Summary: The 1941 attack by Japan on the United States naval base at Pearl Harbor draws the United States into World War II. As a matter of fact, World War II
marked the peak of human atrocities. The war in Europe begins with the invasion of Poland by Nazi Germany, led by Adolf Hitler, in 1939. World War II ended on September 2,
1945, when Japan formally surrendered to the United States, signaling the end of
============================================================
Summarize WW2. Summary: World War 2 Summary of World War 2 Date: September 1939 to September 1945 Causes: The war was sparked by the aggression of Germany under Adolf Hitler
who sought to annex Austria and invade Poland. Other countries like Italy, under Benito Mussolini, joined the war in 1940. The war was fought between the Axis powers (Germany,
Italy, and Japan) and the Allies (Great Britain, France
============================================================
Summarize WW2. Summary: The second world war, which lasted from 1939 to 1945, was a global conflict that pitted the axis powers, consisting of germany, italy, and j... | |
Read the full text The second world war, which lasted from 1939 to 1945, was a global conflict that pitted the axis powers, consisting of Germany, Italy, and Japan, against
the allied powers, consisting of Great Britain, France
============================================================
Summarize WW2. Summary: World War II (1939-1945) was a global conflict that arose from the unresolved issues and alliances created by World War I. Here is a concise summary of
the war:  **Causes:** Nazi Germany, led by Adolf Hitler, sought to dominate Europe and spread their fascist ideology. After the war ended, there were severe territorial losses
and reparations that Germany was forced to pay, leading to widespread resentment and financial hardship. Germany annexed Austria
============================================================
Summarize WW2. Summary: World War II was a global war that lasted from 1939 to 1945 and was the deadliest conflict in human history, involving more than 30 countries and
claiming the lives of millions of people. It is considered the largest and most significant war of the 20th century. The causes of the war are widely debated, but some of the
key factors include: Nationalism: the rise of nations seeking to expand their borders, such as Nazi Germany
============================================================
Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945. It was fought between two main alliances: the Allies (which consisted of countries
such as the United States, the United Kingdom, and the Soviet Union) and the Axis (which consisted of countries such as Germany, Italy, and Japan). The war was marked by
devastating battles on multiple fronts, including Europe, Africa, and Asia, and resulted in the loss of millions
============================================================
Summarize WW2. Summary: World War II was a global war that lasted from 1939 to 1945. It was the largest and deadliest conflict in human history, resulting in the loss of
millions of lives. The war was fought between two main alliances: the Allies (including the United States, United Kingdom, France, Soviet Union, and China) and the Axis powers
(led by Germany, Italy, and Japan). The war began in Europe in September 1939 when
============================================================
"""

Summarize WW2. Summary: The 20th Century World War 2, also referred to as the second global conflict, World War 2 lasted for four years, from September 1, 1939 to September 2,
1945. With the signing of the Treaty of Versailles in 1919, Germany, the loser of World War 1, was given a huge amount of penalties and reparations which added to national
resentment. How did the Soviet Union become
Summarize WW2. Summary: World War II (1939-1945) was a global conflict that arose after the failure of the League of Nations and began with the invasion of Poland by Germany
and the annexation of Czechoslovakia. Essay on World War 2. Hitler, Italy’s Mussolini and Germany’s FDR wanted to create empires and destroy opponents. World War 2 Summary
World War II began in 1939, after Germany, led by Hitler,
Summarize WW2. Summary: World War II, the global conflict that lasted from 1939-1945, was the most destructive war in human history. Germany, under Adolf Hitler and the Nazi
Party, sought to dominate Euro

In [7]:
# temp = 1.25
"""
Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945 and was the deadliest war in history. It was fought between the Allies, which included the United States, Great
Britain, France, and the Soviet Union, and the Axis powers, which included Germany, Italy, and Japan. The war began in September 1939, when Germany, under the leadership of Adolf Hitler, invaded
Poland. This act of aggression prompted the United
============================================================
Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945. It was fought between the Allied Powers, which included the United States, the United Kingdom, and the Soviet
Union, and the Axis Powers, which included Germany, Italy, and Japan. The war was sparked by the aggressive actions of Nazi Germany, led by Adolf Hitler, who sought to conquer and dominate Europe and
the world. The war was marked by many key events
============================================================
Summarize WW2. Summary: World War 2 (1939-1945) was a global conflict that lasted for six years and involved almost every country in the world. The war was fought between two main alliances, the Axis
powers (Germany, Italy, and Japan) and the Allied powers (the United States, the Soviet Union, Great Britain, and France). The war began when Germany, led by Adolf Hitler, invaded Poland in September
1939. The Allies responded by
============================================================
Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945, involving many of the world's major nations. It was the deadliest war in human history, resulting in over 50
million military and civilian casualties. The war was fought between the Allies (principally the United States, Great Britain, and the Soviet Union) and the Axis powers (principally Germany, Italy,
and Japan). The war began in Europe in
============================================================
Summarize WW2. Summary: World War 2, also known as the Second World War, was a global conflict that lasted from 1939 to 1945. The war involved a vast majority of the world's nations and led to
significant global repercussions. Essay on world war ii summary World War II Summary The Second World War was a global conflict that lasted from 1939 to 1945. It was the deadliest war in history,
killing millions of people, including soldiers
============================================================
"""

Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945 and was the deadliest war in history. It was fought between the Allies, which included the United States, Great
Britain, France, and the Soviet Union, and the Axis powers, which included Germany, Italy, and Japan. The war began in September 1939, when Germany, under the leadership of Adolf Hitler, invaded
Poland. This act of aggression prompted the United
Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945. It was fought between the Allied Powers, which included the United States, the United Kingdom, and the Soviet
Union, and the Axis Powers, which included Germany, Italy, and Japan. The war was sparked by the aggressive actions of Nazi Germany, led by Adolf Hitler, who sought to conquer and dominate Europe and
the world. The war was marked by many key events
Summarize WW2. Summary: World War 2 (1939-1945) was a global conflict that lasted for six years and invo

In [None]:
# top_p = 0.95, temp = 1.25
"""
Summarize WW2. Summary: World War II, the deadliest military conflict in history, lasted from 1939 to 1945. Here is a brief summary of the war's main events: Background: Germany, under. Apr 22, 2020 ·
Summary of World War II In 1939, Germany invaded Poland, which led to the United Kingdom and France declaring war on Germany. This marked the beginning of World War II. In 1941, Germany and the
============================================================
Summarize WW2. Summary: The... Summarize WW2. Summary: The Second World War (1939-1945) was a global conflict that involved most of the world's nations. The war was fought primarily between two main
alliances: the Axis powers, consisting of Germany, Italy, and Japan, and the Allied powers, consisting of the United States, the United Kingdom, and the Soviet Union. Here is a summary of the war in
five key points: I.
============================================================
Summarize WW2. Summary: World War II (WWII) was a global conflict that lasted from 1939 to 1945. It involved most of the world's nations, including all of the great powers, organised into two main
alliances: the Allies, comprising countries such as the United States, the United Kingdom, France, the Soviet Union, and China, and the Axis, comprising countries such as Germany, Italy, and Japan.
The war was sparked by the. Summary
============================================================
Summarize WW2. Summary: World War II (WW2) was a global conflict that lasted from 1939 to 1945, involving many countries and regions. It was the deadliest conflict in history, resulting in millions of
deaths and widespread destruction. Here is a summary of World War II: Pre-War Years: In the 1930s, Germany, led by Adolf Hitler, began to rearm and expand its military. Hitler's aggressive foreign
policy, including the annex
============================================================
Summarize WW2. Summary: World War II was a global conflict that lasted from 1939 to 1945. The war began when Nazi Germany, led by Adolf Hitler, invaded Poland in September 1939. This act of aggression
prompted the United Kingdom and France to declare war on Germany, which marked the beginning of the war in Europe. In the years that followed, Germany expanded its territorial claims and launched
attacks on several other countries, including Denmark, Norway, Belgium, the
============================================================
"""

# BART

In [2]:
from transformers import BartTokenizer, BartForConditionalGeneration
import torch
from src import SCRATCH_CACHE_DIR


# Load the tokenizer and model
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn', cache_dir=SCRATCH_CACHE_DIR,)
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn', cache_dir=SCRATCH_CACHE_DIR,)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model.to("cuda")


BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50264, 1024, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50264, 1024, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 1024)
      (layers): ModuleList(
        (0-11): 12 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (v_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (q_proj): Linear(in_features=1024, out_features=1024, bias=True)
            (out_proj): Linear(in_features=1024, out_features=1024, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=1024, out_features=4096, bias=True)
          (fc2): Linear(in_features=4096, out_features=1024, bias=True)
          (final_l

In [4]:
input_text = """Summarize this excerpt in one sentence. The summary should also answer the question: What is the main takeway from the excerpt? Story: Noork squinted. So the Misty Ones were not entirely invisible. Pain\nwas growing in his numbed arm now, but as it came so came strength. He\nclimbed further out on the great branch to where sticky and overripe\nfruit hung heavy. With a grin he locked his legs upon the forking of\nthe great limb and filled his arms with fruit.\n \n  A barrage of the juicy fruit blanketed the misty shapes. Stains spread\nand grew. Patchy outlines took on a new color and sharpness. Noork\nfound that he was pelting a half-dozen hooded and robed creatures whose\narms and legs numbered the same as his own, and the last remnant of\nsuperstitious fear instilled in his bruised brain by the shaggy Vasads\nvanished.\n \n  These Misty Ones were living breathing creatures like himself! They\nwere not gods, or demons, or even the ghostly servants of demons. He\nstrung his bow quickly, the short powerful bow that Gurn had given him,\nand rained arrows down upon the cowering robed creatures.\n \n  And the monsters fled. They fled down the trail or faded away into the\njungle. All but one of them. The arrow had pierced a vital portion of\nthis Misty One's body. He fell and moved no more.\n \n  A moment later Noork was ripping the stained cloak and hood from the\nfallen creature, curious to learn what ghastly brute-thing hid beneath\nthem. His lip curled at what he saw.\n \n  The Misty One was almost like himself. His skin was not so golden as\nthat of the other men of Zuran, and his forehead was low and retreating\nin a bestial fashion. Upon his body there was more hair, and his face\nwas made hideous with swollen colored scars that formed an irregular\ndesign. He wore a sleeveless tunic of light green and his only weapons\nwere two long knives and a club.\n \n  \"So,\" said Noork, \"the men of the island prey upon their own kind. And\nthe Temple of Uzdon in the lake is guarded by cowardly warriors like\nthis.\"\n \n
Summary:"""
inputs = tokenizer(input_text, return_tensors='pt')
max_new_tokens = 50
sample_outputs = model.generate(
    inputs['input_ids'].to("cuda"),
    num_beams=1,
    do_sample=True,
    temperature=1,
    max_new_tokens=50,
    num_return_sequences=10,
    early_stopping=True,
    eos_token_id=tokenizer.eos_token_id,
    output_logits=True,
    output_scores=True,
    return_dict_in_generate=True,
    )



In [5]:
len(sample_outputs["logits"]), sample_outputs["logits"][0].shape, sample_outputs["sequences"].shape

(50, torch.Size([10, 50264]), torch.Size([10, 51]))

In [6]:
from textwrap import wrap
for i in range(10):
    print(sample_outputs["sequences"][i][-50:])
    print("\n".join(wrap(tokenizer.decode(sample_outputs["sequences"][i],), 175)))
    print("="*60)

tensor([    0,     0,     0,   250, 20397,    19,   909, 26172,  4631,     4,
           39,   652,     8,  1275, 14638,    21,   818,   101,  1003,     8,
          648,    21,   156, 42396,    19, 27722, 20585, 26172,     4,   440,
         9657,    21,   910,  7153,   159,    15,    19, 36486,     4,   870,
            5,    86,    37,  2312,     7,   800,     5, 20397,     6,     2],
       device='cuda:0')
</s><s><s><s>A creature with black scars covering. his face and red lips was almost like himself and yet was made hideous with swollen colored scars. Noork was rained down on
with arrows. By the time he managed to release the creature,</s>
tensor([    0,     0,     0, 43952,     5,   527,     6,  1137,     5,   527,
            9,   440,  9657,     6,     5,   664,  2143,    54,  4951,    19,
          103,     9,     5, 12241,   219, 44350, 15916,  3817,    10,  6693,
          196,     6,  4727,  2946,     4, 12192,     5, 10746,    10,  4315,
        32933,  1258,     9,     

In [7]:
torch.stack(sample_outputs["logits"], dim=1)[4][5]

tensor([-0.9271,  0.2838,  2.8761,  ...,  0.6331,  0.6093,  0.6054],
       device='cuda:0')

In [8]:
sample_outputs["logits"][5][4]

tensor([-0.9271,  0.2838,  2.8761,  ...,  0.6331,  0.6093,  0.6054],
       device='cuda:0')

In [9]:
output_ids.shape[0]

NameError: name 'output_ids' is not defined

In [10]:
output_ids = sample_outputs["sequences"]
logits = sample_outputs["logits"]
output_ids = output_ids[:, -len(logits):] # logits is a tuple of length = max_new_tokens
assert output_ids.shape[1] == len(logits) and output_ids.shape[0] == logits[0].shape[0]
stacked_logits = torch.stack(logits, dim=1) # bz x seq_len x vocab_len
log_likelihoods = []
for logits_2d, output_ids_1d in zip(
    stacked_logits, output_ids
):
    log_likelihoods += [
        torch.gather(
            input=torch.log_softmax(logits_2d, dim=1),
            dim=1,
            # Double-checked, this is what you want
            index=output_ids_1d[:, None],
        ).sum().item()
    ]
torch.tensor(log_likelihoods)

tensor([-110.4096, -118.9912, -113.9986, -112.7521,  -63.7804,  -61.7757,
        -112.6064,  -89.7589, -106.5427, -107.9353])

In [107]:
sum([torch.log_softmax(logits_2d, dim=1)[i, elt] for i, elt in enumerate(output_ids_1d)])

-98.49181365966797