In [2]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
from datasets import load_dataset
import torch
from nltk.translate.meteor_score import meteor_score

In [3]:
data = load_dataset('knkarthick/dialogsum')

In [4]:
model_path = '../models/BART'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

In [5]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model.to(device)

BartForConditionalGeneration(
  (model): BartModel(
    (shared): Embedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): Embedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_layer_norm): LayerNorm((768,), eps=

In [6]:
pipe = pipeline(task= 'summarization', model=model, tokenizer= tokenizer, device='mps')

In [7]:
text = "#Person1#: Excuse me, how can I get special discount coupons?\n#Person2#: Buy more and get more special discount coupons.\n#Person1#: Can I get a discount coupon if I buy these goods?\n#Person2#: Of course You get a coupon for every 3 bags of sugar.\n#Person1#: But how much discount can I get if I use it to buy goods next time?\n#Person2#: 10 pence off if you use this coupon.\n#Person1#: Can I buy everything in the supermarket by it? .\n#Person2#: Yeah, you need to take advantage of it within its expiry date.\n#Person1#: How long can I keep it?\n#Person2#: The coupon can be used at least one year.\n#Person1#: I see. I will take 9 bags of sugar so that I can get 3 coupons.\n#Person2#: All right. I will get them for you."

In [8]:
gen_kwargs = {'length_penalty': 0.7, 'num_beams': 8, "max_length": 120, 'min_length':30}

In [9]:
summary = pipe(text, **gen_kwargs)[0]['summary_text'].split()
print(' '.join(summary))

#Person2# tells #Person1# how to get special discount coupons and how to use it to buy goods next time. They will take 9 bags of sugar.


In [10]:
refer = ["#Person2# answers #Person1#'s questions about getting special discount coupons and how to use them.".split()]

In [11]:
score = meteor_score(refer, summary)
print(score)

0.5661143599740092


In [12]:
print(text)
print(len(text.split()))

#Person1#: Excuse me, how can I get special discount coupons?
#Person2#: Buy more and get more special discount coupons.
#Person1#: Can I get a discount coupon if I buy these goods?
#Person2#: Of course You get a coupon for every 3 bags of sugar.
#Person1#: But how much discount can I get if I use it to buy goods next time?
#Person2#: 10 pence off if you use this coupon.
#Person1#: Can I buy everything in the supermarket by it? .
#Person2#: Yeah, you need to take advantage of it within its expiry date.
#Person1#: How long can I keep it?
#Person2#: The coupon can be used at least one year.
#Person1#: I see. I will take 9 bags of sugar so that I can get 3 coupons.
#Person2#: All right. I will get them for you.
137


In [12]:
biased = """
#person1#: what seems to be the problem? #person2#: my stomach hurts, doctor. #person1#: has this been a problem before? #person2#: yes. #person1#: for how long? #person2#: i have had it on and off for the past three years. it's just gotten much worse these past two weeks. #person1#: do you only feel this way when you stomach is empty? #person2#: after i have eaten, it goes away for a while. #person1#: do you feel nauseous? #person2#: yes, occasionally. #person1#: do you have regular bowel movements? #person2#: i think so. #person1#: let me take a look at your abdomen. lie down on your back and bend your knees up. ok. relax... alright, it seems like you have a duodenal ulcer, but we will have to run some tests before i can be certain. you should get a good rest first and try not to strain your stomach too much. #person2#: is it serious? #person1#: not too serious, but it will take you some time to recover, so you will need to be patient
"""

In [13]:
print(biased)
print(len(biased.split()))


#person1#: what seems to be the problem? #person2#: my stomach hurts, doctor. #person1#: has this been a problem before? #person2#: yes. #person1#: for how long? #person2#: i have had it on and off for the past three years. it's just gotten much worse these past two weeks. #person1#: do you only feel this way when you stomach is empty? #person2#: after i have eaten, it goes away for a while. #person1#: do you feel nauseous? #person2#: yes, occasionally. #person1#: do you have regular bowel movements? #person2#: i think so. #person1#: let me take a look at your abdomen. lie down on your back and bend your knees up. ok. relax... alright, it seems like you have a duodenal ulcer, but we will have to run some tests before i can be certain. you should get a good rest first and try not to strain your stomach too much. #person2#: is it serious? #person1#: not too serious, but it will take you some time to recover, so you will need to be patient

172


In [14]:
summary = pipe(biased, **gen_kwargs)[0]['summary_text']
print(summary)
print(len(summary.split()))

#person2#'s stomach hurts and #person1# tells #person3# it's because of a duodenal ulcer.
13
