# Workshop 1 - Summarization 

In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig

## T5 Models

The <code>flan-t5</code> is a Text-To-Text Transfer Transformer (T5) that is capable of performing zero-shot NLP task such as summary, simple reasoninig, answering questions, etc. 

Some T5 models from Huggingface
- [<code>google/flan-t5-base</code>](https://huggingface.co/google/flan-t5-base)
- [<code>google/flan-t5-small</code>](https://huggingface.co/google/flan-t5-small)
- [<code>google/flan-t5-xl</code>](https://huggingface.co/google/flan-t5-xl)
- [<code>google/flan-t5-xxl</code>](https://huggingface.co/google/flan-t5-xxl) - full model

Complete list of [T5 models](https://huggingface.co/models?search=google/flan) on Huggingface.

In [3]:
model_name = 'google/flan-t5-base'

In [4]:
# TODO: Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [5]:
# TODO: Print the model
print(model)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=768, out_features=2048, bias=False)
              (wi_1): Linear(in_features=768, out_features=2048, bias=False)
              (wo):

In [None]:
text = """ 
Two roads diverged in a yellow wood,
And sorry I could not travel both
And be one traveler, long I stood
And looked down one as far as I could
To where it bent in the undergrowth;

Then took the other, as just as fair,
And having perhaps the better claim,
Because it was grassy and wanted wear;
Though as for that the passing there
Had worn them really about the same,

And both that morning equally lay
In leaves no step had trodden black.
Oh, I kept the first for another day!
Yet knowing how way leads on to way,
I doubted if I should ever come back.

I shall be telling this with a sigh
Somewhere ages and ages hence:
Two roads diverged in a wood, and I—
I took the one less traveled by,
And that has made all the difference.
"""

In [6]:
text = """ 
When a traveler in north central Massachusetts takes the wrong fork
at the junction of the Aylesbury pike just beyond Dean's Corners he
comes upon a lonely and curious country. The ground gets higher, and
the brier-bordered stone walls press closer and closer against the ruts
of the dusty, curving road. The trees of the frequent forest belts
seem too large, and the wild weeds, brambles, and grasses attain a
luxuriance not often found in settled regions. At the same time the
planted fields appear singularly few and barren; while the sparsely
scattered houses wear a surprizing uniform aspect of age, squalor, and
dilapidation. Without knowing why, one hesitates to ask directions
from the gnarled, solitary figures spied now and then on crumbling
doorsteps or in the sloping, rock-strewn meadows. Those figures are
so silent and furtive that one feels somehow confronted by forbidden
things, with which it would be better to have nothing to do. When a
rise in the road brings the mountains in view above the deep woods,
the feeling of strange uneasiness is increased. The summits are too
rounded and symmetrical to give a sense of comfort and naturalness, and
sometimes the sky silhouettes with especial clearness the queer circles
of tall stone pillars with which most of them are crowned.
"""

In [12]:
text = "You can use gpt-oss-120b and gpt-oss-20b with Transformers. If you use the Transformers chat template, it will automatically apply the harmony response format. If you use model.generate directly, you need to apply the harmony format manually using the chat template or use our openai-harmony package."

In [13]:
# TODO: Create a prompt
prompt = f"{text}\nSummarize the aforementioned text in a single phrase."
print(prompt)


You can use gpt-oss-120b and gpt-oss-20b with Transformers. If you use the Transformers chat template, it will automatically apply the harmony response format. If you use model.generate directly, you need to apply the harmony format manually using the chat template or use our openai-harmony package.
Summarize the aforementioned text in a single phrase.


In [14]:
# TODO: tokenize the text
enc_text = tokenizer(prompt, return_tensors='pt').input_ids

print(enc_text)

tensor([[  148,    54,   169,     3,   122,   102,    17,    18,    32,     7,
             7,    18, 15518,   115,    11,     3,   122,   102,    17,    18,
            32,     7,     7,  7988,   115,    28, 31220,     7,     5,   156,
            25,   169,     8, 31220,     7,  3582,  3847,     6,    34,    56,
          3269,  1581,     8, 18362,  1773,  1910,     5,   156,    25,   169,
           825,     5,   729,    49,   342,  1461,     6,    25,   174,    12,
          1581,     8, 18362,  1910, 12616,   338,     8,  3582,  3847,    42,
           169,    69,   539,     9,    23,    18,  3272, 21208,  2642,     5,
         12198,  1635,  1737,     8,     3,     9, 22835,  1499,    16,     3,
             9,   712,  9261,     5,     1]])


In [None]:
# TODO: Decode the token


In [15]:
# TODO: Generate summary with model 
enc_summary = model.generate(enc_text)
print(enc_summary)

tensor([[    0,  2048,     8, 22113,    63,  1910,     5,     1]])


In [16]:
# TODO: Decode the summary
summary = tokenizer.decode(enc_summary[0], skip_special_tokens=True)
print(summary)

Use the Harmony format.


## Manuall perform one decoding step 

In [17]:
# TODO: Get the decoder and the lm_head
decoder = model.decoder
lm_head = model.lm_head

In [20]:
# TODO: Feed the encoded prompt directly to the decode by passing the encoder
decoder_output = decoder(enc_text)
print(decoder_output)
print(decoder_output[0].shape)


BaseModelOutputWithPastAndCrossAttentions(last_hidden_state=tensor([[[-0.0711, -0.0300, -0.0542,  ...,  0.1540, -0.0347, -0.1538],
         [-0.0263, -0.1119,  0.0380,  ...,  0.0950, -0.0942, -0.1246],
         [-0.0805, -0.0711,  0.0472,  ..., -0.0879, -0.0577, -0.0262],
         ...,
         [-0.0857, -0.1256, -0.0341,  ...,  0.1918,  0.1070,  0.0098],
         [ 0.0996, -0.0983,  0.0235,  ...,  0.2491,  0.0173, -0.0008],
         [-0.1110, -0.1467, -0.0806,  ...,  0.2892,  0.0279,  0.0953]]],
       grad_fn=<MulBackward0>), past_key_values=EncoderDecoderCache(layers=[<transformers.cache_utils.DynamicLayer object at 0x71ec7f3ea720>]), hidden_states=None, attentions=None, cross_attentions=None)
torch.Size([1, 95, 768])


In [22]:
# TODO: Find the size of the tensor from the decoder
lm_output = lm_head(decoder_output[0])
print(lm_output)
print(lm_output[0].shape)

tensor([[[-50.5213,  -6.9690, -10.8567,  ..., -50.5461, -50.4077, -50.5863],
         [-54.0293,  -8.5675, -11.5029,  ..., -54.0879, -53.9373, -54.0907],
         [-50.6281,  -6.9449, -12.0460,  ..., -50.6677, -50.4971, -50.7353],
         ...,
         [-51.8635,  -7.5650,  -9.3714,  ..., -51.8891, -51.9981, -52.0953],
         [-47.8012,  -0.8571, -10.0058,  ..., -47.7816, -47.8446, -47.9007],
         [-66.8487,  -9.1937, -12.9279,  ..., -66.7421, -66.8590, -67.1442]]],
       grad_fn=<UnsafeViewBackward0>)
torch.Size([95, 32128])


In [24]:
# TODO: Feed the decoder output into the lm_head
# TODO: Print the shape of the lm_head output
next_token = lm_output[0, -1].argmax(axis=-1)
print(next_token)
print(tokenizer.decode(next_token))

tensor(51)
m


In [25]:
# TODO: Get the next predicted (highest/greedy) token of the prompt. 
all_ids = lm_output[0].argmax(-1)
print(tokenizer.decode(all_ids))

. give youra s </s> or or-o-g-gtt-oss-b. s . you are  Transformers, on on you' be be to Transformer to to to</s> you use the it,ator,, it will to modify the  to to. the  code. the the command-s commandin--.</s>,t the meaning...rith  and thea word word.</s>m


In [26]:
# TODO: decode the token
for i in all_ids:
   print(tokenizer.decode(i))

.
give
your
a

s

</s>
or
or
-
o
-
g
-
g
t
t
-
o
s
s
-
b
.

s

.
you
are

Transformer
s
,
on
on
you
'
be
be
to
Transformer
to
to
to
</s>
you
use
the

it
,
ator
,
,
it
will
to
modify
the

to
to
.
the

code
.
the
the
command
-
s
command
in
-
-
.
</s>
,
t
the
meaning
...
rith

and
the
a
word
word
.
</s>
m


In [None]:
# TODO: Get all the predicted next token


In [None]:
# TODO: Print each token individually


## T5 Models

The <code>flan-t5</code> is a Text-To-Text Transfer Transformer (T5) that is capable of performing zero-shot NLP task such as summary, simple reasoninig, answering questions, etc. 

Some T5 models from Huggingface
- [<code>google/flan-t5-base</code>](https://huggingface.co/google/flan-t5-base)
- [<code>google/flan-t5-small</code>](https://huggingface.co/google/flan-t5-small)
- [<code>google/flan-t5-xl</code>](https://huggingface.co/google/flan-t5-xl)
- [<code>google/flan-t5-xxl</code>](https://huggingface.co/google/flan-t5-xxl) - full model

Complete list of [T5 models](https://huggingface.co/models?search=google/flan) on Huggingface.

In [27]:
text = """ 
When a traveler in north central Massachusetts takes the wrong fork
at the junction of the Aylesbury pike just beyond Dean's Corners he
comes upon a lonely and curious country. The ground gets higher, and
the brier-bordered stone walls press closer and closer against the ruts
of the dusty, curving road. The trees of the frequent forest belts
seem too large, and the wild weeds, brambles, and grasses attain a
luxuriance not often found in settled regions. At the same time the
planted fields appear singularly few and barren; while the sparsely
scattered houses wear a surprizing uniform aspect of age, squalor, and
dilapidation. Without knowing why, one hesitates to ask directions
from the gnarled, solitary figures spied now and then on crumbling
doorsteps or in the sloping, rock-strewn meadows. Those figures are
so silent and furtive that one feels somehow confronted by forbidden
things, with which it would be better to have nothing to do. When a
rise in the road brings the mountains in view above the deep woods,
the feeling of strange uneasiness is increased. The summits are too
rounded and symmetrical to give a sense of comfort and naturalness, and
sometimes the sky silhouettes with especial clearness the queer circles
of tall stone pillars with which most of them are crowned.
"""

In [28]:
prompt = f'''
Write a short summary for this article: {text}
'''

In [30]:
# TODO Perform summarization with google/flan-t5-base model, configure the model's output logits
model_name = "google/flan-t5-base"

config = GenerationConfig(
   do_sample = True,
   temperature = 2.0,
   top_p = .8
   #top_k = 10
)

summary_enc = model.generate(enc_text, generation_config=config)
