In [1]:
!pip install transformers==3.5.0
!pip install torch==1.7.0
!pip install rouge-score

from transformers import PegasusTokenizer,PegasusForConditionalGeneration
from rouge_score import rouge_scorer
import torch
import os
from tqdm import tqdm
import pandas as pd
import nltk
nltk.download('punkt')



In [45]:

check_path = "/content/AMICorpusXML/data/ami-summary/abstractive/ES2004a.abssumm.txt"
from pathlib import Path

my_file = Path(check_path)
if not my_file.is_file():
    if not Path("/content/AMICorpusXML/data").is_dir():
        ! git clone https://github.com/gcunhase/AMICorpusXML
    ! python /content/AMICorpusXML/main_obtain_meeting2summary_data.py --summary_type abstractive
else:
    print("Data already prepared... Importing\n")

Data already prepared... Importing



In [2]:
# taking same split to make easy comparision
# data split knowledge: http://groups.inf.ed.ac.uk/ami/corpus/datasets.shtml
# 5 x 4 = 20 samples 
test = "ES2004, ES2014, IS1009, TS3003, TS3007".split(',')

story = []
story_directory = r'/content/AMICorpusXML/data/ami-transcripts-stories/abstractive'
for filename in os.listdir(story_directory):
    if filename.endswith(".story"):
        for each in test:
            if each.strip() in str(filename):
                story.append(filename)
     
summary = []
sum_directory = r'/content/AMICorpusXML/data/ami-summary/abstractive'
for filename in os.listdir(sum_directory):
    if filename.endswith(".txt"):
        for each in test:
            if each.strip() in str(filename):
                summary.append(filename)

def prepare_model(model_name = 'google/pegasus-xsum'):
    print("----------------- Preparing Model -----------------")
    
    torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
    tokenizer = PegasusTokenizer.from_pretrained(model_name)
    model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)
    print("----------------- Model Prepared -----------------")
    return tokenizer, model, torch_device

def test_model(src_text, tokenizer, model):
    print("\nNew Testing started...")
    batch = tokenizer.prepare_seq2seq_batch(src_text, truncation=True, padding='longest').to(torch_device)
    translated = model.generate(**batch)
    tgt_text = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return tgt_text

def data_from_file(f1,f2):
    with open(f1, 'r') as file:
        data1 = file.read().replace('\n', '')
    with open(f2, 'r') as file:
        data2 = file.read().replace('\n', '')
    return data1, data2

df = pd.DataFrame()
st, sums_pred, sums_ann = [], [], []

# remove later
# flag = 0

tokenizer, model, torch_device = prepare_model()
for each in tqdm(story):
    a = each.split('.')[0]
    story_file = story_directory+'/'+each
    summ_file = sum_directory+'/'+a+".abssumm.txt"
    story_file, summ_file = data_from_file(story_file, summ_file)
    st.append(story_file)
    src_text = [story_file.replace('\n',' ')]
    result = test_model(src_text, tokenizer, model)
    sums_pred.append(result[0])
    sums_ann.append(summ_file)

    # flag+=1
    # if flag >=2: break

df['stories'] = st
df['summaries'] = sums_pred
df['Summary annotated'] = sums_ann
df.to_csv('summary_df.csv')
display(df.head())

----------------- Preparing Model -----------------


  0%|          | 0/20 [00:00<?, ?it/s]

----------------- Model Prepared -----------------

New Testing started...


  5%|▌         | 1/20 [00:22<07:05, 22.40s/it]


New Testing started...


 10%|█         | 2/20 [00:44<06:40, 22.26s/it]


New Testing started...


 15%|█▌        | 3/20 [01:04<06:09, 21.71s/it]


New Testing started...


 20%|██        | 4/20 [01:47<07:29, 28.08s/it]


New Testing started...


 25%|██▌       | 5/20 [02:09<06:34, 26.29s/it]


New Testing started...


 30%|███       | 6/20 [02:37<06:12, 26.64s/it]


New Testing started...


 35%|███▌      | 7/20 [02:58<05:24, 24.93s/it]


New Testing started...


 40%|████      | 8/20 [03:20<04:49, 24.10s/it]


New Testing started...


 45%|████▌     | 9/20 [03:58<05:12, 28.37s/it]


New Testing started...


 50%|█████     | 10/20 [04:31<04:57, 29.72s/it]


New Testing started...


 55%|█████▌    | 11/20 [04:58<04:19, 28.86s/it]


New Testing started...


 60%|██████    | 12/20 [05:26<03:48, 28.56s/it]


New Testing started...


 65%|██████▌   | 13/20 [05:52<03:14, 27.72s/it]


New Testing started...


 70%|███████   | 14/20 [06:12<02:34, 25.68s/it]


New Testing started...


 75%|███████▌  | 15/20 [06:35<02:03, 24.74s/it]


New Testing started...


 80%|████████  | 16/20 [07:05<01:44, 26.21s/it]


New Testing started...


 85%|████████▌ | 17/20 [07:42<01:29, 29.68s/it]


New Testing started...


 90%|█████████ | 18/20 [08:02<00:53, 26.73s/it]


New Testing started...


 95%|█████████▌| 19/20 [08:28<00:26, 26.53s/it]


New Testing started...


100%|██████████| 20/20 [08:57<00:00, 26.87s/it]


Unnamed: 0,stories,summaries,Summary annotated
0,Hello. Hi. Yeah. It's too beautiful. A minute ...,"Janus, welcome back to the functional design m...",When this functional design meeting opens the ...
1,Well hi everyone again. Um like before we uh I...,This is the meeting where we discuss the desig...,The project manager opens the meeting by going...
2,"Wouldn't wanna be Project Manager. Uh, what we...",Let's start with the presentation.,The project manager opened the meeting and wen...
3,"Okay. Yeah, my name is Francina. And I'm uh an...",In our series of letters from Australian stude...,The meeting opens with the group doing introdu...
4,"Okay, is everybody ready? Mm-hmm. Um I take it...",Mm-hmm.,The meeting begins and the marketing expert st...


In [23]:
# R1 and R2 for a sample summary 
a = df["stories"][0]
b = df["summaries"][0]
c = df["Summary annotated"][0]

def rogue_sc(b, a):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2'], use_stemmer=True)
    scores = scorer.score(b,a)
    return scores

scores = rogue_sc(b,a)
print("Word Capture: ", scores['rouge1'][1])
scores = rogue_sc(b,c)
print("ROUGE-1 Score: ", scores['rouge1'][1])
print("ROUGE-2 Score: ", scores['rouge2'][1])



Word Capture:  1.0
ROUGE-1 Score:  0.625
ROUGE-2 Score:  0.2857142857142857


In [39]:
rouge1 = []
rouge2 = []
word_capt = []
for index, row in df.iterrows():
    rouge1.append(rogue_sc(row['summaries'], row['Summary annotated'])['rouge1'][1])
    rouge2.append(rogue_sc(row['summaries'], row['Summary annotated'])['rouge2'][1])
    word_capt.append(rogue_sc(row['summaries'], row['Summary annotated'])['rouge1'][1])
print("Average Rouge1 Score: ", 100*np.average(np.array(rouge1)))
print("Average Rouge2 Score: ", 100*np.average(np.array(rouge2)))

Average Rouge1 Score:  35.14517243762526
Average Rouge2 Score:  9.775370970492922


0.3514517243762526

0.006346627874198489

HUGGING FACE - Implementation 1 - Parameter Tuning

In [None]:
# download model
mname = "google/pegasus-xsum"
model = PegasusForConditionalGeneration.from_pretrained(mname)# download tokenizer
tok = PegasusTokenizer.from_pretrained(mname)

text = '''
Hi, I'm David and I'm supposed to be an industrial designer. Um, I just got the project announcement about what the project is. Designing a remote control. That's about it, didn't get anything else. Did you get the same thing? Cool. There's too much gear. Okay. Can't draw. Um. Yeah. Um, well anyway, I don't know, it's just the first animal I can think off the top of my head. Um. Yes. Big reason is 'cause I'm allergic to most animals. Allergic to animal fur, so um fish was a natural choice. Um, yeah, and I kind of like whales. They come in and go eat everything in sight. And they're quite harmless and mild and interesting. Tail's a bit big, I think. It's an after dinner dog then. Hmm. It does make sense from maybe the design point of view 'cause you have more complicated characters like European languages, then you need more buttons. So, possibly. Hmm. Yeah. And you keep losing them. Finding them is really a pain, you know. I mean it's usually quite small, or when you want it right, it slipped behind the couch or it's kicked under the table. You know. Yep. Mm-hmm. I think one factor would be production cost. Because there's a cap there, so um depends on how much you can cram into that price. Um. I think that that's the main factor. Cool.
Okay. Right. Um well this is the kick-off meeting for our our project. Um and um this is just what we're gonna be doing over the next twenty five minutes. Um so first of all, just to kind of make sure that we all know each other, I'm Laura and I'm the project manager. Do you want to introduce yourself again? Okay. Great. Okay. Um so we're designing a new remote control and um Oh I have to record who's here actually. So that's David, Andrew and Craig, isn't it? And you all arrived on time. Um yeah so des uh design a new remote control. Um, as you can see it's supposed to be original, trendy and user friendly. Um so that's kind of our our brief, as it were. Um and so there are three different stages to the design. Um I'm not really sure what what you guys have already received um in your emails. What did you get? Mm-hmm. Is that what everybody got? Okay. Um. So we're gonna have like individual work and then a meeting about it. And repeat that process three times. Um and at this point we get try out the whiteboard over there. Um. So uh you get to draw your favourite animal and sum up your favourite characteristics of it. So who would like to go first? Very good. Mm-hmm. Yeah. Yeah. Right. Lovely. Right. You can take as long over this as you like, because we haven't got an awful lot to discuss. Ok oh we do we do. Don't feel like you're in a rush, anyway. Ach why not We might have to get you up again then. I don't know what mine is. I'm gonna have to think on the spot now. Is that a whale? Ah. Okay. God, I still don't know what I'm gonna write about. Um. I was gonna choose a dog as well. But I'll just draw a different kind of dog. M my favourite animal is my own dog at home. Um That doesn't really look like him, actually. He looks more like a pig, actually. Ah well. Do you? Oh that's very good of you. Uh. Um he's a mixture of uh various things. Um and what do I like about him, um That's just to suggest that his tail wags. Um he's very friendly and cheery and always pleased to see you, and very kind of affectionate and um uh and he's quite quite wee as well so you know he can doesn't take up too much space. Um and uh And he does a funny thing where he chases his tail as well, which is quite amusing, so It is. I think it is. He only does it after he's had his dinner and um he'll just all of a sudden just get up and start chasing his tail 'round the living room. Yeah, so uh Yeah, maybe. Maybe. Right, um where did you find this? Just down here? Yeah. Okay. Um what are we doing next? Uh um. Okay, uh we now need to discuss the project finance. Um so according to the brief um we're gonna be selling this remote control for twenty five Euro, um and we're aiming to make fifty million Euro. Um so we're gonna be selling this on an international scale. And uh we don't want it to cost any more than uh twelve fifty Euros, so fifty percent of the selling price. Sure. All together. Um I dunno. I imagine That's a good question. I imagine it probably is our sale actually because it's probably up to the the um the retailer to uh sell it for whatever price they want. Um. But I I don't know, I mean do you think the fact that it's going to be sold internationally will have a bearing on how we design it at all? Think it will? Um. Hmm. Oh yeah, regions and stuff, yeah. Yeah. Okay. Yeah. Well for a remote control, do you think that will be I suppose it's depends on how complicated our remote control is. Yeah, yeah. Okay. What, just like in terms of like the wealth of the country? Like how much money people have to spend on things like? Aye, I see what you mean, yeah. Marketing. Good marketing thoughts. Oh gosh, I should be writing all this down. Um. Mm. Yeah. Yeah, yeah. Like how much does, you know, a remote control cost. Well twenty five Euro, I mean that's um that's about like eighteen pounds or something, isn't it? Or no, is it as much as that? Sixteen seventeen eighteen pounds. Um, I dunno, I've never bought a remote control, so I don't know how how good a remote control that would get you. Um. But yeah, I suppose it has to look kind of cool and gimmicky. Um right, okay. Let me just scoot on ahead here. Okay. Um well d Does anybody have anything to add to uh to the finance issue at all? Thin No, actually. That would be useful, though, wouldn't it, if you knew like what your money would get you now. Mm-hmm. Yeah, yeah. Oh. Five minutes to end of meeting. Oh, okay. We're a bit behind. Yeah. Right, so do you think that should be like a main design aim of our remote control d you know, do your your satellite and your regular telly and your V_C_R_ and everything? Mm-hmm. Yeah. Or even like, you know, notes about um what you wanna watch. Like you might put in there oh I want to watch such and such and look a Oh that's a good idea. So extra functionalities. Mm-hmm. Hmm. Um okay, uh I'd wel we're gonna have to wrap up pretty quickly in the next couple of minutes. Um I'll just check we've nothing else. Okay. Um so anything else anybody wants to add about what they don't like about remote controls they've used, what they would really like to be part of this new one at all? You keep losing them. Okay. Yeah. W You get those ones where you can, if you like, whistle or make a really high pitched noise they beep. There I mean is that something we'd want to include, do you think? Dunno. Okay maybe. My goodness. Still feels quite primitive. Maybe like a touch screen or something? Okay. Uh-huh, okay. Well I guess that's up to our industrial designer. It looks better. Yeah. Okay. Okay. Right, well um so just to wrap up, the next meeting's gonna be in thirty minutes. So that's about um about ten to twelve by my watch. Um so inbetween now and then, um as the industrial designer, you're gonna be working on you know the actual working design of it so y you know what you're doing there. Um for user interface, technical functions, I guess that's you know like what we've been talking about, what it'll actually do. Um and uh marketing executive, you'll be just thinking about what it actually what, you know, what requirements it has to has to fulfil and you'll all get instructions emailed to you, I guess. Um. Yeah, so it's th the functional design stage is next, I guess. And uh and that's the end of the meeting. So I got that little message a lot sooner than I thought I would, so Mm-hmm. Uh-huh, yeah. Th Okay, well just very quickly 'cause this we're supposed to finish now. Um I guess that's up to us, I mean you probably want some kind of unique selling point of it, so um, you know Yeah. Mm-hmm. Yeah. Okay. Right, okay, we'll that's that's the end of the meeting, then. Um. So, uh thank you all for coming.
Um I'm Craig and I'm User Interface. Yeah. Well, my favourite animal would be a monkey. Then they're small cute and furry, and uh when planet of the apes becomes real, I'm gonna be up there with them. Yeah. I know um My parents went out and bought um remote controls because um they got fed up of having four or five different remote controls for each things the house. So um for them it was just how many devices control. Uh.
Mm-hmm. Great. And I'm Andrew and I'm uh our marketing expert. Mm-hmm. Mm-hmm. Yeah, that's that's it. Yeah. I will go. That's fine. Alright. So This one here, right? Okay. Very nice. Alright. My favourite animal is like A beagle. Um charac favourite characteristics of it? Is that right? Uh, right, well basically um high priority for any animal for me is that they be willing to take a lot of physical affection from their family. And, yeah that they have lots of personality and uh be fit and in robust good health. So this is blue. Blue beagle. My family's beagle. I coulda told you a whole lot more about beagles. Boy, let me tell you. Impressionist. Alright. Mm. Superb sketch, by the way. Yep. I see a dog in there. Yep. Now I see a rooster. What kind is it? Is he aware that th it's his own cha tail he's chasing? Hmm. Probably when he was little he got lots of attention for doing it and has forever been conditioned. 'Kay. Um, can we just go over that again? Uh, so bas at twel Alright, yeah. Okay. So cost like production cost is twelve fifty, but selling price is is that wholesale or retail? Like on the shelf. Our sale our sale anyway. Yeah, okay okay. Okay. Mm-hmm. Alright. Yes. Mm-hmm. Mm-hmm. Well right away I'm wondering if there's um th th uh, like with D_V_D_ players, if there are zones. Um f frequencies or something um as well as uh characters, um different uh keypad styles and s symbols. Um. I don't know. Yeah. Yeah. Yeah. And then a and then al the other thing international is on top of the price. I'm thinking the price might might appeal to a certain market in one region, whereas in another it'll be different, so Just a chara just a characteristic of the Just Or just like, basic product podi positioning, the twenty five Euro remote control might be a big hit in London, might not be such a big hit in Greece, who knows, something like that, yeah. Yep. Right away I'm making some kind of assumptions about what what information we're given here, thinking, 'kay trendy probably means something other than just basic, something other than just standard. Um so I'm wondering right away, is selling twenty five Euros, is that sort of the thi is this gonna to be like the premium product kinda thing or Uh-huh. Mm-hmm. Yep. Yeah, I'd say so, yeah. No. Yeah, yeah. Mm-hmm. Do we have any other background information on like how that compares to other other Yeah. Mm-hmm. Yeah, interesting thing about discussing um production of a remote control for me is that l as you point out, I just don't think of remote controls as somethin something people consciously assess in their purchasing habits. It's just like getting shoelaces with shoes or something. It just comes along. Do you know what I mean? Like so sort of like how do you I I mean one one way of looking at it would be, well the people producing television sets, maybe they have to buy remote controls. Or another way is maybe people who have T_V_ sets are really fed up with their remote control and they really want a better one or something. But Right. Right. Okay so Right, so in function one of the priorities might be to combine as many uses I think so. Yeah, yeah. Yeah. Well like um, maybe what we could use is a sort of like a example of a successful other piece technology is palm palm pilots. They're gone from being just like little sort of scribble boards to cameras, M_P_ three players, telephones, everything, agenda. So, like, I wonder if we might add something new to the to the remote control market, such as the lighting in your house, or um Yeah, yeah. An Yeah. Like, p personally for me, at home I've I've combined the um the audio video of my television set and my D_V_D_ player and my C_D_ player. So they w all work actually function together but I have different remote controls for each of them. So it's sort of ironic that that then they're in there um you know, the sound and everything it's just one system. But each one's got its own little part. Mm. Mm. Mm. Mm-hmm. Mm-hmm. Yeah. Yeah. That's just really good id Yep. Uh, sure. I remember when the first remote control my my family had was on a cable. Actually had a cable between it and the T_V_ and big like buttons that sort of like, like on a blender or something. And um, you know, when I think about what they are now, it's better, but actually it's still kind of, I dunno, like a massive junky thing on the table. Maybe we could think about how, could be more, you know, streamlined. S Something like that, yeah. Or whatever would be technologically reasonable. 'Cause it could b it could it could be that f it could be that functionally that doesn't make it any better, but that just the appeal of of not having You know, these days there's a r pe things in people's homes are becoming more and more like chic, you know. Um, nicer materials and might be be worth exploring anyway. Okay. Um. Before we wrap up, just to make sure we're all on the same page here, um, do we We were given sort of an example of a coffee machine or something, right? Well, um are we at ma right now on the assumption that our television remote control may have features which go beyond the television? Or are we keeping sort of like a a design commitment to television features? I I don't know. Yep. Yeah, sure. Okay. Okay, yeah. Okay. Okay. Okay. Alright.
'''

batch = tok.prepare_seq2seq_batch(src_texts=[text])
# Hyperparameter Tuning
gen = model.generate(**batch,max_length = 200, # max length of summary
min_length = 100, # min length of summary
do_sample = True, 
temperature = 3.0,
top_k =30,
top_p=0.70,
repetition_penalty = 1.2,
length_penalty = 5, # if more than 1 encourage model to generate #larger sequences
num_return_sequences=1) # no of summary you want to generate)
# for forward pass: model(**batch)
summary = tok.batch_decode(gen, skip_special_tokens=True)
print(summary[0])