In [1]:
# load the dataset from drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install datasets==1.6.0
!pip install transformers
!pip install rouge_score
!pip install sacrebleu
!pip install sentencepiece

# !pip install transformers
# !pip install sentencepiece
# !pip install tokenizers

# install Hugging Face libraries
# !pip install tokenizers
# !pip install transformers

Collecting datasets==1.6.0
[?25l  Downloading https://files.pythonhosted.org/packages/da/d6/a3d2c55b940a7c556e88f5598b401990805fc0f0a28b2fc9870cf0b8c761/datasets-1.6.0-py3-none-any.whl (202kB)
[K     |████████████████████████████████| 204kB 6.5MB/s 
[?25hCollecting fsspec
[?25l  Downloading https://files.pythonhosted.org/packages/e9/91/2ef649137816850fa4f4c97c6f2eabb1a79bf0aa2c8ed198e387e373455e/fsspec-2021.4.0-py3-none-any.whl (108kB)
[K     |████████████████████████████████| 112kB 9.3MB/s 
[?25hCollecting xxhash
[?25l  Downloading https://files.pythonhosted.org/packages/7d/4f/0a862cad26aa2ed7a7cd87178cbbfa824fc1383e472d63596a0d018374e7/xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243kB)
[K     |████████████████████████████████| 245kB 9.4MB/s 
Collecting huggingface-hub<0.1.0
  Downloading https://files.pythonhosted.org/packages/a1/88/7b1e45720ecf59c6c6737ff332f41c955963090a18e72acbcbeac6b25e86/huggingface_hub-0.0.8-py3-none-any.whl
Installing collected packages: fsspec, 

In [3]:
import datasets
import transformers
import pandas as pd
import numpy as np

In [4]:
# use T5 Tokenizer to truncate the reference sections associated with T5 generations
from transformers import T5Tokenizer
T5_tokenizer = T5Tokenizer.from_pretrained('t5-base')

# use BART Tokenizer to truncate the reference sectionsassociated with BART generations
from transformers import BartTokenizer
BART_tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=791656.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1389353.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=898823.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=456318.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1355863.0, style=ProgressStyle(descript…




## Define functions to calculate metrics

In [46]:
def truncate_reference_sections(references, model_tokenizer):
  
  # keep the count of truncated sections
  COUNT_REFERENCES_TRUNCATED = 0

  # save the truncated references in array 
  truncated_references = []

  # for each section in references
  for ref_section in references:
    
    # tokenize with <model_tokenizer>
    ref_section_tokenized = model_tokenizer(ref_section)
    input_ids = ref_section_tokenized['input_ids']

    # truncate the tokezined current ref_section if length > 512
    if len(input_ids) > 512:
      
      COUNT_REFERENCES_TRUNCATED += 1

      # choose only <= 512
      input_ids = input_ids[:512]

      # decode back 
      truncated_ref_text = model_tokenizer.decode(input_ids)

      truncated_references.append(truncated_ref_text)

    # otherwise just leave section as it is
    else:
      truncated_references.append(ref_section)

  print("Num. of sections truncated: ", COUNT_REFERENCES_TRUNCATED)

  return truncated_references

In [47]:
def map_to_length(x):
  
  # map article and summary len to dict as well as if sample is longer than 512 tokens

  x["generations_len"] = len(x["generations"])

  x["references_len"] = len(x["references"])

  x["len_differece"] = x["references_len"] - x["generations_len"]

  return x

In [48]:
# Use sacrebleu metric instead of bleu
def calc_sacrebleu(gold_references, model_generations, truncation=False, model_tokenizer=None):
  
  # in case we want to truncate 
  if truncation: gold_references = truncate_reference_sections(references=gold_references, 
                                                               model_tokenizer=model_tokenizer)

  # check
  assert len(model_generations) == len(gold_references)

  # load the metrics
  sacrebleu_metric = datasets.load_metric('sacrebleu')

  # do the preprocessing for calculating sacrebleu - basically make list of lists
  for ind_section in range(len(gold_references)):
    gold_references[ind_section] = [gold_references[ind_section]]

  # check
  assert len(model_generations) == len(gold_references)

  # add ref-gen pairs to metric
  sacrebleu_metric.add_batch(predictions=model_generations, references=gold_references)

  # check
  assert len(sacrebleu_metric) == len(gold_references)

  # Computing the metric scores
  sacrebleu_score = sacrebleu_metric.compute()

  return sacrebleu_score

In [49]:
# Use original bleu

def calc_bleu(gold_references, model_generations, truncation=False, model_tokenizer=None):

  # in case we want to truncate
  if truncation: gold_references = truncate_reference_sections(references=gold_references, 
                                                               model_tokenizer=model_tokenizer)
  
  # check
  assert len(model_generations) == len(gold_references)

  # load the metrics
  bleu_metric = datasets.load_metric("bleu")

  # do the preprocessing for calculating bleu

  model_generations = [gen_section.split() for gen_section in model_generations]

  for ind_section in range(len(gold_references)):
    gold_references[ind_section] = [gold_references[ind_section]]

  gold_references = [orig_section[0].split() for orig_section in gold_references]

  for ind_section in range(len(gold_references)):
    gold_references[ind_section] = [gold_references[ind_section]]

  # check
  assert len(model_generations) == len(gold_references)

  # add ref-gen pairs to metric
  bleu_metric.add_batch(predictions=model_generations, references=gold_references)

  # check
  assert len(bleu_metric) == len(gold_references)

  # Computing the metric scores
  bleu_score = bleu_metric.compute()

  return bleu_score

# Content Planner

In [29]:
# read generated sections
PATH_TEST_GENERATED_1 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/test_generations_section1.txt'
with open(PATH_TEST_GENERATED_1) as f:
    content_planner_gen_section_1 = [line.strip() for line in f]

# read generated sections
PATH_TEST_GENERATED_2 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/test_generations_section2.txt'
with open(PATH_TEST_GENERATED_2) as f:
    content_planner_gen_section_2 = [line.strip() for line in f]

# read generated sections
PATH_TEST_GENERATED_3 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/test_generations_section3.txt'
with open(PATH_TEST_GENERATED_3) as f:
    content_planner_gen_section_3 = [line.strip() for line in f]

# read generated sections
PATH_TEST_GENERATED_4 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/test_generations_section4.txt'
with open(PATH_TEST_GENERATED_4) as f:
    content_planner_gen_section_4 = [line.strip() for line in f]

# read generated sections
PATH_TEST_GENERATED_5 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/test_generations_section5.txt'
with open(PATH_TEST_GENERATED_5) as f:
    content_planner_gen_section_5 = [line.strip() for line in f]

# read generated sections
PATH_TEST_GENERATED_6 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/test_generations_section6.txt'
with open(PATH_TEST_GENERATED_6) as f:
    content_planner_gen_section_6 = [line.strip() for line in f]

In [30]:
# read reference sections
PATH_TEST_REF_1 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/section1_tgt_test.txt'
with open(PATH_TEST_REF_1) as f:
    content_planner_ref_1 = [line.strip() for line in f]

# read reference sections
PATH_TEST_REF_2 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/section2_tgt_test.txt'
with open(PATH_TEST_REF_2) as f:
    content_planner_ref_2 = [line.strip() for line in f]

# read reference sections
PATH_TEST_REF_3 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/section3_tgt_test.txt'
with open(PATH_TEST_REF_3) as f:
    content_planner_ref_3 = [line.strip() for line in f]

# read reference sections
PATH_TEST_REF_4 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/section4_tgt_test.txt'
with open(PATH_TEST_REF_4) as f:
    content_planner_ref_4 = [line.strip() for line in f]

# read reference sections
PATH_TEST_REF_5 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/section5_tgt_test.txt'
with open(PATH_TEST_REF_5) as f:
    content_planner_ref_5 = [line.strip() for line in f]

# read reference sections
PATH_TEST_REF_6 = '/content/drive/MyDrive/bayer-intern/content_planner_generations/section6_tgt_test.txt'
with open(PATH_TEST_REF_6) as f:
    content_planner_ref_6 = [line.strip() for line in f]

## Create Content Plan 2 files - **combined all generations, combined all references**

In [31]:
all_content_planner_generations = content_planner_gen_section_1 + \
                                  content_planner_gen_section_2 + \
                                  content_planner_gen_section_3 + \
                                  content_planner_gen_section_4 + \
                                  content_planner_gen_section_5 + \
                                  content_planner_gen_section_6

In [32]:
all_content_planner_references = content_planner_ref_1 + \
                                 content_planner_ref_2 + \
                                 content_planner_ref_3 + \
                                 content_planner_ref_4 + \
                                 content_planner_ref_5 + \
                                 content_planner_ref_6

In [33]:
assert len(all_content_planner_references) == len(all_content_planner_generations)

In [34]:
TGT_CONTENT_PLANNER_ALL_GENERATIONS = '/content/drive/MyDrive/bayer-intern/content_planner_generations/all_generations_Content_Planner.txt'

In [35]:
# write to a file

output_file = open(TGT_CONTENT_PLANNER_ALL_GENERATIONS, 'w')

for sec_gen in all_content_planner_generations:
    sec_gen += "\n"
    output_file.write(sec_gen)

output_file.close()

In [37]:
TGT_CONTENT_PLANNER_ALL_REFERENCES = '/content/drive/MyDrive/bayer-intern/content_planner_generations/all_references_Content_Planner.txt'

In [38]:
# write to a file

output_file = open(TGT_CONTENT_PLANNER_ALL_REFERENCES, 'w')

for ref_gen in all_content_planner_references:
    ref_gen += "\n"
    output_file.write(ref_gen)

output_file.close()

In [39]:
# read all generations by Content Planner
with open(TGT_CONTENT_PLANNER_ALL_GENERATIONS) as f:
    CP_generations = [line.strip() for line in f]


# read all references by Content Planner
with open(TGT_CONTENT_PLANNER_ALL_REFERENCES) as f:
    CP_references = [line.strip() for line in f]

In [40]:
assert len(CP_generations) == len(CP_references)

### How to decide on the threshold for length of content planner sections?


My solution - use T5_tokenizer to tokenize both content_planner_generated sections and references sections 

In [42]:
def truncate_content_planner_reference(section_references):

  # keep the count of truncated sections
  COUNT_REFERENCES_TRUNCATED = 0

  # save the truncated references in array 
  truncated_references = []

  # for each section in references
  for ref_section in section_references:
    
    # tokenize with <model_tokenizer>
    ref_section_tokenized = T5_tokenizer(ref_section)
    input_ids = ref_section_tokenized['input_ids']

    # truncate the tokezined current ref_section if length > 512
    if len(input_ids) > 512:
      
      COUNT_REFERENCES_TRUNCATED += 1

      # choose only <= 512
      input_ids = input_ids[:512]

      # decode back 
      truncated_ref_text = T5_tokenizer.decode(input_ids)
  
      truncated_references.append(truncated_ref_text)

    # otherwise just leave section as it is
    else:
      truncated_references.append(ref_section)

  print("Num. of sections truncated: ", COUNT_REFERENCES_TRUNCATED)

  return truncated_references

In [43]:
def truncate_content_planner_generations(section_generations):

  # keep the count of truncated sections
  COUNT_SECTIONS_TRUNCATED = 0

  # save the truncated references in array 
  truncated_sections = []

  # for each section in references
  for gen_section in section_generations:
    
    # tokenize with <model_tokenizer>
    gen_section_tokenized = T5_tokenizer(gen_section)
    input_ids = gen_section_tokenized['input_ids']

    # truncate the tokezined current gen_section if length > 512
    if len(input_ids) > 512:
      
      COUNT_SECTIONS_TRUNCATED += 1

      # choose only <= 512
      input_ids = input_ids[:512]

      # decode back 
      truncated_gen_text = T5_tokenizer.decode(input_ids)
  
      truncated_sections.append(truncated_gen_text)

    # otherwise just leave section as it is
    else:
      truncated_sections.append(gen_section)

  print("Num. of sections truncated: ", COUNT_SECTIONS_TRUNCATED)

  return truncated_sections

In [44]:
new_CP_references = truncate_content_planner_reference(section_references=CP_references)
new_CP_generations = truncate_content_planner_generations(section_generations=CP_generations)

Token indices sequence length is longer than the specified maximum sequence length for this model (552 > 512). Running this sequence through the model will result in indexing errors


Num. of sections truncated:  312
Num. of sections truncated:  289


In [52]:
# Truncated Sections SacreBleu Results
CP_trun_sacrebleu_result_final = calc_sacrebleu(gold_references=new_CP_references.copy(), 
                                    model_generations=new_CP_generations.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )

print(CP_trun_sacrebleu_result_final)

That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.


{'score': 33.19727927936324, 'counts': [108052, 67865, 53737, 45904], 'totals': [197563, 196821, 196079, 195337], 'precisions': [54.69242722574571, 34.480568638509105, 27.405790523207482, 23.49990017252236], 'bp': 1.0, 'sys_len': 197563, 'ref_len': 196229}


In [53]:
# Original Sections SacreBleu Results
CP_sacrebleu_result_final = calc_sacrebleu(gold_references=CP_references.copy(), 
                                    model_generations=CP_generations.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )

print(CP_sacrebleu_result_final)

That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.


{'score': 27.787614623926533, 'counts': [151844, 91069, 70074, 59092], 'totals': [314163, 313421, 312679, 311937], 'precisions': [48.332871789485075, 29.056444845750605, 22.410843069090024, 18.943568733430148], 'bp': 1.0, 'sys_len': 314163, 'ref_len': 313587}


In [54]:
# Truncated Sections BlEU Results
CP_trun_bleu_result_final = calc_bleu(gold_references=new_CP_references.copy(), 
                                    model_generations=new_CP_generations.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )

print(CP_trun_bleu_result_final)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=2488.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1554.0, style=ProgressStyle(description…


{'bleu': 0.3115594423336759, 'precisions': [0.5188168090656124, 0.32787853272606987, 0.2600697208421399, 0.22189148517471186], 'brevity_penalty': 0.9898093578205754, 'length_ratio': 0.9898609314359638, 'translation_length': 183639, 'reference_length': 185520}


In [55]:
# Original Sections BlEU Results
CP_bleu_result_final = calc_bleu(gold_references=CP_references.copy(), 
                                    model_generations=CP_generations.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )

print(CP_bleu_result_final)

{'bleu': 0.2742287000989337, 'precisions': [0.4788702739122548, 0.2876156768516414, 0.22281497780150247, 0.1882957133441658], 'brevity_penalty': 0.9946243636378755, 'length_ratio': 0.9946387607289223, 'translation_length': 308712, 'reference_length': 310376}


## Calculating metrics for each section separately

In [56]:
# 1
new_content_planner_ref_1 = truncate_content_planner_reference(section_references=content_planner_ref_1)
new_content_planner_gen_section_1 = truncate_content_planner_generations(section_generations=content_planner_gen_section_1)

# 2
new_content_planner_ref_2 = truncate_content_planner_reference(section_references=content_planner_ref_2)
new_content_planner_gen_section_2 = truncate_content_planner_generations(section_generations=content_planner_gen_section_2)

# 3
new_content_planner_ref_3 = truncate_content_planner_reference(section_references=content_planner_ref_3)
new_content_planner_gen_section_3 = truncate_content_planner_generations(section_generations=content_planner_gen_section_3)

# 4
new_content_planner_ref_4 = truncate_content_planner_reference(section_references=content_planner_ref_4)
new_content_planner_gen_section_4 = truncate_content_planner_generations(section_generations=content_planner_gen_section_4)

# 5
new_content_planner_ref_5 = truncate_content_planner_reference(section_references=content_planner_ref_5)
new_content_planner_gen_section_5 = truncate_content_planner_generations(section_generations=content_planner_gen_section_5)

# 6
new_content_planner_ref_6 = truncate_content_planner_reference(section_references=content_planner_ref_6)
new_content_planner_gen_section_6 = truncate_content_planner_generations(section_generations=content_planner_gen_section_6)

Num. of sections truncated:  14
Num. of sections truncated:  3
Num. of sections truncated:  123
Num. of sections truncated:  123
Num. of sections truncated:  69
Num. of sections truncated:  46
Num. of sections truncated:  89
Num. of sections truncated:  90
Num. of sections truncated:  0
Num. of sections truncated:  0
Num. of sections truncated:  17
Num. of sections truncated:  27


# Calculate metrics

### Generations and references Unchanged

In [None]:
CP_sacrebleu_results = {
    'section 1': None,
    'section 2': None,
    'section 3': None,
    'section 4': None,
    'section 5': None,
    'section 6': None
}

In [None]:
# 1
CP_sacrebleu_result_1 = calc_sacrebleu(gold_references=content_planner_ref_1.copy(), 
                                    model_generations=content_planner_gen_section_1.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_sacrebleu_results['section 1'] = CP_sacrebleu_result_1

# 2
CP_sacrebleu_result_2 = calc_sacrebleu(gold_references=content_planner_ref_2.copy(), 
                                    model_generations=content_planner_gen_section_2.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_sacrebleu_results['section 2'] = CP_sacrebleu_result_2

# 3
CP_sacrebleu_result_3 = calc_sacrebleu(gold_references=content_planner_ref_3.copy(), 
                                    model_generations=content_planner_gen_section_3.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_sacrebleu_results['section 3'] = CP_sacrebleu_result_3

# 4
CP_sacrebleu_result_4 = calc_sacrebleu(gold_references=content_planner_ref_4.copy(), 
                                    model_generations=content_planner_gen_section_4.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_sacrebleu_results['section 4'] = CP_sacrebleu_result_4

# 5
CP_sacrebleu_result_5 = calc_sacrebleu(gold_references=content_planner_ref_5.copy(), 
                                    model_generations=content_planner_gen_section_5.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_sacrebleu_results['section 5'] = CP_sacrebleu_result_5

# 6
CP_sacrebleu_result_6 = calc_sacrebleu(gold_references=content_planner_ref_6.copy(), 
                                    model_generations=content_planner_gen_section_6.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_sacrebleu_results['section 6'] = CP_sacrebleu_result_6

That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.
That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.
That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.


In [None]:
[print(CP_sacrebleu_results[sec_res]['score']) for sec_res in CP_sacrebleu_results]

33.93171335697557
24.283563136152395
25.656890826985226
26.186036561192484
49.912002663064875
22.136802152070857


[None, None, None, None, None, None]

In [None]:
CP_sacrebleu_results_list = [CP_sacrebleu_results[_]['score'] for _ in CP_sacrebleu_results]

In [None]:
print("Average - Content_Planner_sacrebleu_results: ", np.mean(CP_sacrebleu_results_list))
print("Std - Content_Planner_sacrebleu_results: ", np.std(CP_sacrebleu_results_list))

Average - Content_Planner_sacrebleu_results:  30.351168116073566
Std - Content_Planner_sacrebleu_results:  9.479124000632142


### Truncated with T5 tokenizer

In [None]:
CP_trun_sacrebleu_results = {
    'section 1': None,
    'section 2': None,
    'section 3': None,
    'section 4': None,
    'section 5': None,
    'section 6': None
}

In [None]:
# 1
CP_trun_sacrebleu_result_1 = calc_sacrebleu(gold_references=new_content_planner_ref_1.copy(), 
                                    model_generations=new_content_planner_gen_section_1.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_sacrebleu_results['section 1'] = CP_trun_sacrebleu_result_1

# 2
CP_trun_sacrebleu_result_2 = calc_sacrebleu(gold_references=new_content_planner_ref_2.copy(), 
                                    model_generations=new_content_planner_gen_section_2.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_sacrebleu_results['section 2'] = CP_trun_sacrebleu_result_2

# 3
CP_trun_sacrebleu_result_3 = calc_sacrebleu(gold_references=new_content_planner_ref_3.copy(), 
                                    model_generations=new_content_planner_gen_section_3.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_sacrebleu_results['section 3'] = CP_trun_sacrebleu_result_3

# 4
CP_trun_sacrebleu_result_4 = calc_sacrebleu(gold_references=new_content_planner_ref_4.copy(), 
                                    model_generations=new_content_planner_gen_section_4.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_sacrebleu_results['section 4'] = CP_trun_sacrebleu_result_4

# 5
CP_trun_sacrebleu_result_5 = calc_sacrebleu(gold_references=new_content_planner_ref_5.copy(), 
                                    model_generations=new_content_planner_gen_section_5.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_sacrebleu_results['section 5'] = CP_trun_sacrebleu_result_5

# 6
CP_trun_sacrebleu_result_6 = calc_sacrebleu(gold_references=new_content_planner_ref_6.copy(), 
                                    model_generations=new_content_planner_gen_section_6.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_sacrebleu_results['section 6'] = CP_trun_sacrebleu_result_6

That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.
That's 100 lines that end in a tokenized period ('.')
It looks like you forgot to detokenize your test data, which may hurt your score.
If you insist your data is detokenized, or don't care, you can suppress this message with '--force'.


In [None]:
[print(CP_trun_sacrebleu_results[sec_res]['score']) for sec_res in CP_trun_sacrebleu_results]

33.08692682330122
31.025031767978952
27.827920465001228
33.777062223425986
49.912002663064875
29.084590087025642


[None, None, None, None, None, None]

In [None]:
CP_trun_sacrebleu_results_list = [CP_trun_sacrebleu_results[_]['score'] for _ in CP_trun_sacrebleu_results]

In [None]:
print("Average - Content_Planner_trun_sacrebleu_results: ", np.mean(CP_trun_sacrebleu_results_list))
print("Std - Content_Planner_trun_sacrebleu_results: ", np.std(CP_trun_sacrebleu_results_list))

Average - Content_Planner_trun_sacrebleu_results:  34.11892233829965
Std - Content_Planner_trun_sacrebleu_results:  7.360894803767474


## BLEU scores

In [58]:
CP_bleu_results = {
    'section 1': None,
    'section 2': None,
    'section 3': None,
    'section 4': None,
    'section 5': None,
    'section 6': None
}

In [59]:
# 1
CP_bleu_result_1 = calc_bleu(gold_references=content_planner_ref_1.copy(), 
                                    model_generations=content_planner_gen_section_1.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_bleu_results['section 1'] = CP_bleu_result_1

# 2
CP_bleu_result_2 = calc_bleu(gold_references=content_planner_ref_2.copy(), 
                                    model_generations=content_planner_gen_section_2.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_bleu_results['section 2'] = CP_bleu_result_2

# 3
CP_bleu_result_3 = calc_bleu(gold_references=content_planner_ref_3.copy(), 
                                    model_generations=content_planner_gen_section_3.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_bleu_results['section 3'] = CP_bleu_result_3

# 4
CP_bleu_result_4 = calc_bleu(gold_references=content_planner_ref_4.copy(), 
                                    model_generations=content_planner_gen_section_4.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_bleu_results['section 4'] = CP_bleu_result_4

# 5
CP_bleu_result_5 = calc_bleu(gold_references=content_planner_ref_5.copy(), 
                                    model_generations=content_planner_gen_section_5.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_bleu_results['section 5'] = CP_bleu_result_5

# 6
CP_bleu_result_6 = calc_bleu(gold_references=content_planner_ref_6.copy(), 
                                    model_generations=content_planner_gen_section_6.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_bleu_results['section 6'] = CP_bleu_result_6

In [60]:
[print(CP_bleu_results[sec_res]['bleu']) for sec_res in CP_bleu_results]

0.34042072035461646
0.23840965231186811
0.25533202628951335
0.2580059556571986
0.4971176027529728
0.21598187899648028


[None, None, None, None, None, None]

In [62]:
CP_bleu_results_list = [CP_bleu_results[_]['bleu'] for _ in CP_bleu_results]

In [65]:
print("Average - Content_Planner_bleu_results: ", np.mean(CP_bleu_results_list))

Average - Content_Planner_bleu_results:  0.30087797272710826


### BLEU Truncated

In [74]:
CP_trun_bleu_results = {
    'section 1': None,
    'section 2': None,
    'section 3': None,
    'section 4': None,
    'section 5': None,
    'section 6': None
}

In [75]:
# 1
CP_trun_bleu_result_1 = calc_bleu(gold_references=new_content_planner_ref_1.copy(), 
                                    model_generations=new_content_planner_gen_section_1.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_bleu_results['section 1'] = CP_trun_bleu_result_1

# 2
CP_trun_bleu_result_2 = calc_bleu(gold_references=new_content_planner_ref_2.copy(), 
                                    model_generations=new_content_planner_gen_section_2.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_bleu_results['section 2'] = CP_trun_bleu_result_2

# 3
CP_trun_bleu_result_3 = calc_bleu(gold_references=new_content_planner_ref_3.copy(), 
                                    model_generations=new_content_planner_gen_section_3.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_bleu_results['section 3'] = CP_trun_bleu_result_3

# 4
CP_trun_bleu_result_4 = calc_bleu(gold_references=new_content_planner_ref_4.copy(), 
                                    model_generations=new_content_planner_gen_section_4.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_bleu_results['section 4'] = CP_trun_bleu_result_4

# 5
CP_trun_bleu_result_5 = calc_bleu(gold_references=new_content_planner_ref_5.copy(), 
                                    model_generations=new_content_planner_gen_section_5.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_bleu_results['section 5'] = CP_trun_bleu_result_5

# 6
CP_trun_bleu_result_6 = calc_bleu(gold_references=new_content_planner_ref_6.copy(), 
                                    model_generations=new_content_planner_gen_section_6.copy(),
                                    truncation=False,
                                    model_tokenizer=None
                                    )
CP_trun_bleu_results['section 6'] = CP_trun_bleu_result_6

In [76]:
[print(CP_trun_bleu_results[sec_res]['bleu']) for sec_res in CP_trun_bleu_results]

0.3180841515970607
0.28875685763392
0.25855276281275813
0.29951294727398386
0.4971176027529728
0.282126122374654


[None, None, None, None, None, None]

In [77]:
CP_trun_bleu_results_list = [CP_trun_bleu_results[_]['bleu'] for _ in CP_trun_bleu_results]

In [78]:
print("Average - Content_Planner_bleu_results: ", np.mean(CP_trun_bleu_results_list))

Average - Content_Planner_bleu_results:  0.32402507407422493
