# CNN Daily Mail

## Import libraries

In [1]:
!pip install transformers[torch]
!pip install datasets
!pip install rouge_score



In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, Trainer, TrainingArguments, Seq2SeqTrainingArguments, Seq2SeqTrainer
from datasets import load_dataset
import numpy as np

## Load the dataset

In [3]:
dataset = load_dataset("cnn_dailymail", '3.0.0')
train_dataset = dataset['train']
val_dataset = dataset['validation']

train_dataset_size = len(train_dataset)
val_dataset_size = len(val_dataset)

print(f"Train samples: {train_dataset_size}")
print(f"Validation samples: {val_dataset_size}")

title_key = 'article'
sum_key = 'highlights'

Train samples: 287113
Validation samples: 13368


## Visualize the dataset

In [4]:
train_sample_index = 2
train_sample = train_dataset[train_sample_index]
print("CNN dataset: ")
print(f"# Article:\n{train_sample[title_key]}\n")
print(f"# Highlights:\n{train_sample[sum_key]}\n")

CNN dataset: 
# Article:
MINNEAPOLIS, Minnesota (CNN) -- Drivers who were on the Minneapolis bridge when it collapsed told harrowing tales of survival. "The whole bridge from one side of the Mississippi to the other just completely gave way, fell all the way down," survivor Gary Babineau told CNN. "I probably had a 30-, 35-foot free fall. And there's cars in the water, there's cars on fire. The whole bridge is down." He said his back was injured but he determined he could move around. "I realized there was a school bus right next to me, and me and a couple of other guys went over and started lifting the kids off the bridge. They were yelling, screaming, bleeding. I think there were some broken bones."  Watch a driver describe his narrow escape » . At home when he heard about the disaster, Dr. John Hink, an emergency room physician, jumped into his car and rushed to the scene in 15 minutes. He arrived at the south side of the bridge, stood on the riverbank and saw dozens of people lying

# Transfer learning with another summarization dataset

https://huggingface.co/datasets/ccdv/arxiv-summarization/viewer/document/train

In [5]:

# WARN: This code cell override CNN dataset variables!!!

disable_train = False
enable_papers_dataset = False

if enable_papers_dataset:
  print("WARN: This code cell override CNN dataset variables!!!\n")
  dataset = load_dataset("ccdv/arxiv-summarization", download_mode="force_redownload")
  train_dataset = dataset['train']
  val_dataset = dataset['validation']

  train_dataset_size = len(train_dataset)
  val_dataset_size = len(val_dataset)

  print(f"Papers Train samples: {train_dataset_size}")
  print(f"Papers Validation samples: {val_dataset_size}")

  title_key = 'article'
  sum_key = 'abstract'

  papers_test_sample_index = 7
  len_of_art = 4000 # Cut article because it's too long!
  papers_test_sample = train_dataset[papers_test_sample_index]
  print("Paper dataset:")
  print(f"# Article: [chars length = {len(papers_test_sample[title_key])}]\n")
  print(f" {papers_test_sample[title_key][:len_of_art]} ...\n")
  print(f"# Abstract: [chars length = {len(papers_test_sample[sum_key])}]\n ")
  print(f" {papers_test_sample[sum_key]}\n")
else:
  print("DEBUG: This code cell was skiped \n")

DEBUG: This code cell was skiped 



## Load pre-trained model

In [6]:
is_distil = True
model_max_length = 1024
print(f"Is_distil: {is_distil}")
model_name = 'sshleifer/distilbart-cnn-12-6' if is_distil else 'facebook/bart-base'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=model_max_length)

Is_distil: True


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

## Convert dataset into tokenized format

In [7]:
token_title_len = model_max_length #512 (corto y malo) | 1024 (bueno)
token_sum_len = 128 # (128 max_length of sum)
def tokenize(batch):
    max_length_title = token_title_len
    max_length_sum = token_sum_len
    inputs = tokenizer(batch[title_key], padding="max_length", truncation=True, max_length=max_length_title)
    outputs = tokenizer(batch[sum_key], padding="max_length", truncation=True, max_length=max_length_sum)
    batch["input_ids"] = inputs.input_ids
    batch["attention_mask"] = inputs.attention_mask
    batch["labels"] = outputs.input_ids
    return batch

train_sample_size = min(10000, train_dataset_size) # 10000
val_sample_size = min(5000, val_dataset_size) #5000

sampled_train_dataset = train_dataset.select(range(train_sample_size))
sampled_val_dataset = val_dataset.select(range(val_sample_size))

batch_size = 5000 # 5000

tokenized_train_dataset = sampled_train_dataset.map(tokenize, batched=True, batch_size=batch_size)
tokenized_val_dataset = sampled_val_dataset.map(tokenize, batched=True, batch_size=batch_size)

tokenized_train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_val_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

## Fine-tune the model

In [8]:
"""
 print(list_metrics())

# Rouge metric
#   rouge types: {'rouge1', 'rouge2', 'rougeL', 'rougeLsum'}
metric_test = load_metric('rouge')
metric_test_predictions = ["hello there", "genxral kenobe"]
metric_test_references = ["hello there", "general kenobi"]
metric_test_results = metric_test.compute(predictions=metric_test_predictions, references=metric_test_references, rouge_types=['rouge1'], use_aggregator=True)

print(metric_test_results)
"""

In [9]:
from datasets import load_metric, list_metrics

training_args = Seq2SeqTrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    eval_steps=1000, #1000
    predict_with_generate=True,
    evaluation_strategy="steps",
    remove_unused_columns=False,
)

""" Old logic
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    eval_steps=30,
    evaluation_strategy="steps",
    remove_unused_columns=False,
)
"""

metric = load_metric('rouge')

def compute_metrics(eval_preds):
  predictions, labels = eval_preds
  if isinstance(predictions, tuple):
      predictions = predictions[0]
  #print(f"predictions: {predictions}")
  #print(f"type of predictions: {type(predictions)}")

  predictions_decoded = [ tokenizer.decode(pred, skip_special_tokens=True) for pred in predictions ]
  references_decoded = [{"summary": label} for label in sampled_val_dataset[sum_key]]

  scores = metric.compute(predictions=predictions_decoded, references=references_decoded, rouge_types=["rouge2","rougeLsum"])
  print(f"Scores {scores}")
  score_sum = scores["rouge2"].mid
  return {
    "rouge2_precision": score_sum.precision,
    "rouge2_recall": score_sum.recall,
    "rouge2_fmeasure": score_sum.fmeasure,
  }

# Use if gpu blow up (Do not leak memory)
#def preprocess_logits_for_metrics(logits, labels):
#  return logits.??

""" Old logic
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    compute_metrics=compute_metrics,
    #preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)
"""

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)


train_enable = not is_distil or disable_train

if train_enable:
  trainer.train()

  metric = load_metric('rouge')


## Perform an evaluation

In [10]:
# Compute metrics

scores=trainer.evaluate()
print(scores)

print(f"Model Name: {model_name}\n")

print(f"Title token length: {token_title_len}")
print(f"Sum token length: {token_sum_len}\n")

print(f"Scores:")
for k, v in scores.items():
  print(f"- {k}: {v}")


You're using a BartTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Scores {'rouge2': AggregateScore(low=Score(precision=0.15804034367389377, recall=0.21168221868567605, fmeasure=0.1764605071668387), mid=Score(precision=0.16120423445273893, recall=0.21561135712654128, fmeasure=0.1798326727209022), high=Score(precision=0.16402693459257345, recall=0.21917607945417777, fmeasure=0.18268285313385296)), 'rougeLsum': AggregateScore(low=Score(precision=0.23755432838754056, recall=0.3206821377468737, fmeasure=0.26648763304043194), mid=Score(precision=0.24071940342655035, recall=0.3244017145688574, fmeasure=0.2696405687366908), high=Score(precision=0.24399399994366885, recall=0.32838803266764244, fmeasure=0.27281859616902615))}
{'eval_loss': 8.794404983520508, 'eval_rouge2_precision': 0.16120423445273893, 'eval_rouge2_recall': 0.21561135712654128, 'eval_rouge2_fmeasure': 0.1798326727209022, 'eval_runtime': 2844.5044, 'eval_samples_per_second': 1.758, 'eval_steps_per_second': 0.22}
Model Name: sshleifer/distilbart-cnn-12-6

Title token length: 1024
Sum token leng

## Visualize some results

In [11]:
# Check if CUDA is available and set the device to GPU if it is.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def model_generate_sample(index):

  # Get a sample from the validation dataset.
  val_sample_index = index
  val_sample = sampled_val_dataset[val_sample_index]
  tokenized_sample = tokenized_val_dataset[val_sample_index]

  # Create inputs to the model.
  model_input = {
      key: tokenized_sample[key].unsqueeze(0).to(device)
      for key in ['input_ids', 'attention_mask']
  }

  # Use the model for generation.
  model_output = model.generate(**model_input, max_new_tokens=200)

  # Detokenize the output tokens.
  detokenized_output = tokenizer.decode(model_output.squeeze(), skip_special_tokens=True)

  print(f"\n# Article:\n{val_sample[title_key][:4000]}\n")
  if enable_papers_dataset:
    print(f"# Abstract:\n{val_sample[sum_key]}\n")
  else:
    print(f"# Highlights:\n{val_sample[sum_key]}\n")

  print(f"# Output:\n{detokenized_output}")

samples_indexes = [3, 9, 13, 50]
for i, index in enumerate(samples_indexes):
  print(f"[{i+1}]")
  model_generate_sample(index)
  print("--------------------------")


[1]

# Article:
(CNN)It was an act of frustration perhaps more commonly associated with golf's fictional anti-hero Happy Gilmore than the world's reigning No 1. player. But when Rory McIlroy pulled his second shot on the eighth hole of the WGC Cadillac Championship into a lake Friday, he might as well have been channeling the much loved Adam Sandler character. Before continuing his round with a dropped ball, the four-time major winner launched the 3-iron used to play the offending shot into the water as well. "(It) felt good at the time," a rueful McIlroy later said of the incident in comments carried by the PGA Tour website. "I just let frustration get the better of me. It was heat of the moment, and I mean, if it had of been any other club I probably wouldn't have but I didn't need a 3‑iron for the rest of the round so I thought, why not." The club "must have went a good 60, 70 yards," he joked. McIlroy composed himself to finish with a second round of 70, leaving him one-under for t

# TODOs:

1. [X] Compute ROUGE metric for the evaluation (i.e., use Trainer compute_metrics and dataset load_metric).
2. [X] Check transfer learning to a different summarization dataset (e.g., wiki_summary, arxiv_dataset).
3. [X] Compare results with a distilled model (e.g., 'sshleifer/distilbart-cnn-12-6').



# CNN facebook/bart-base

- Model Name: `facebook/bart-base`

- Dataset: `cnn_dailymail`

- Title token length: `1024`
- Sum token length: `128`

- Dataset train length: `10000`
- Dataset val length: `5000`

- Output length: `200`

### Score
```
Scores {'rouge2': AggregateScore(low=Score(precision=0.20484624966700093, recall=0.05886312858279843, fmeasure=0.08935206997561668), mid=Score(precision=0.21051666971917082, recall=0.06055604875839533, fmeasure=0.09186945669654559), high=Score(precision=0.21620524508130914, recall=0.062432376627603446, fmeasure=0.09457113364927719)), 'rougeLsum': AggregateScore(low=Score(precision=0.38614275409925275, recall=0.1181198035291461, fmeasure=0.1768211281844371), mid=Score(precision=0.39157945123504206, recall=0.11991702761234932, fmeasure=0.1793705964461127), high=Score(precision=0.39705687516976584, recall=0.12202017583925315, fmeasure=0.18228278016093952))}
{'eval_loss': 0.9789249897003174, 'eval_rouge2_precision': 0.21051666971917082, 'eval_rouge2_recall': 0.06055604875839533, 'eval_rouge2_fmeasure': 0.09186945669654559, 'eval_runtime': 772.3522, 'eval_samples_per_second': 6.474, 'eval_steps_per_second': 0.809, 'epoch': 1.0}

Scores:
- eval_loss: 0.9789249897003174
- eval_rouge2_precision: 0.21051666971917082
- eval_rouge2_recall: 0.06055604875839533
- eval_rouge2_fmeasure: 0.09186945669654559
- eval_runtime: 772.3522
- eval_samples_per_second: 6.474
- eval_steps_per_second: 0.809
- epoch: 1.0
```

### Results
```
[1]

# Article:
(CNN)It was an act of frustration perhaps more commonly associated with golf's fictional anti-hero Happy Gilmore than the world's reigning No 1. player. But when Rory McIlroy pulled his second shot on the eighth hole of the WGC Cadillac Championship into a lake Friday, he might as well have been channeling the much loved Adam Sandler character. Before continuing his round with a dropped ball, the four-time major winner launched the 3-iron used to play the offending shot into the water as well. "(It) felt good at the time," a rueful McIlroy later said of the incident in comments carried by the PGA Tour website. "I just let frustration get the better of me. It was heat of the moment, and I mean, if it had of been any other club I probably wouldn't have but I didn't need a 3‑iron for the rest of the round so I thought, why not." The club "must have went a good 60, 70 yards," he joked. McIlroy composed himself to finish with a second round of 70, leaving him one-under for the tournament and eight shots off the pace set by leader JB Holmes. While an improvement on last weeks performance at the Honda Classic event, where he failed to make the cut, the Northern Irishman's frustration with elements of his game was still clear. "I think every golfer feels it because I don't hit shots like the one I hit on 8 on the range," he said. "That's what really bothers me, the fact that I get out on the course and I hit shots that I'm not seeing when I'm in a more relaxed environment. "So it's a little bit of mental, a little bit of physical. It's just everything is not quite matching up." Elsewhere on the course, Ryan Holmes scored a two-under-par 71 to remain in second position overall, two shots behind Holmes. Former world No 1., Adam Scott carded an impressive 68 to finish the day three shots off the pace at six-under while Bubba Watson and Henrik Stenson are tied for fourth on four-under.

# Highlights:
Rory McIlroy throws club into water at WGC Cadillac Championship .
Northern Irishman frustrated after pulling shot into water hazard .

# Output:
Rory McIlroy pulls second shot on the eighth hole of the WGC Cadillac Championship into a lake.
The four-time major winner launched the 3-iron used to play the offending shot into the water.
McIlroy's second round of 70 leaves him one-under for the tournament and eight shots off the pace.
--------------------------
[2]

# Article:
(CNN)Manchester United defender Jonny Evans and Newcastle United striker Papiss Cisse have been charged by the Football Association for allegedly spitting during an altercation in Wednesday night's Premier League game at St James' Park. In a statement, English football's governing body said the players had been charged "in relation to an alleged breach of FA Rule E1[a] in that in or around the 38th minute of the game the two players spat at each other. "The incidents were not seen by the match officials but [were] caught on video." The players have until 6pm GMT on Friday to respond to the charge, and could face six-game bans if found guilty. Both Evans and Cisse released statements the day after the incident, with Evans saying: "I would like to make it clear that I did not spit at Papiss Cisse." Cisse's statement said: "I reacted to something I found very unpleasant. Sometimes it is hard not to react, particularly in the heat of the moment. I have always tried hard to be positive a role model, especially for our young fans, and yesterday I let you down." Spitting at another player is considered beyond the pale by professional footballers, and former Liverpool midfielder Dietmar Hamann, now a TV pundit, told the BBC's Match of the Day programme the incident had been "disgusting." "This is not acceptable," he said. "There are kids watching. Something has to be done. The behaviour towards each other and the referee is deteriorating on a weekly basis." Ex-Manchester United midfielder Paul Scholes said he did not believe Evans had deliberately spat at Cisse, telling BT Sport: "Look, it's not very nice. I think Jonny is spitting on the floor. "I know Jonny -- he's not that type of person. If he wants to do that then it's not hard to miss, is it? He's only stood a yard away from him. What Cisse does afterwards is unforgivable." And former Liverpool player Steve McManaman told the channel: "Cisse stands up and spits right at Jonny Evans' neck from about six inches. It's absolutely disgusting. "Two wrongs do not make a right. If Jonny Evans has spat at him then it's wrong, but for Papiss Cisse to get up and react like that is absolutely disgusting. We talk about bad tackles, but that is worse."

# Highlights:
Alleged incident happened in match at St James' Park .
Players face six-match ban if found guilty .
Evans denied spitting in statement .
Cisse statement says: "I let you down"

# Output:
Manchester United defender Jonny Evans and Newcastle United striker Papiss Cisse have been charged.
The two players have until 6pm GMT on Friday to respond to the charge.
Both Evans and Cisse released statements the day after the incident.
--------------------------
[3]

# Article:
(CNN)The search for a comic book artist missing in the Cayman Islands since Thursday is now being called a recovery mission. Norman Lee, an artist for DC and Marvel comics, went missing while snorkeling with his wife off the eastern coast of Grand Cayman, CNN affiliate WCVB reported. Strong currents hindered the search, which lasted until Friday evening, Cayman 27 reported. "It is unlikely that we will make any recovery at this stage," Chief Inspector Brad Ebanks told Cayman 27. Lee, 47, of Weymouth, Massachusetts, was known and for his work on "Wolverine Annual," "Supergirl," "Starman" and other comic book titles. Tributes flooded his Facebook page and Twitter from friends, fans and colleagues who knew him from art school and comic conventions. "I cannot express how shaken I am that I will never get the chance to see that smile again, and it saddens me that this world has lost a wonderful man in Norman Lee. To his wife Jan, and his family and all his friends and fans that loved him, my sincerest condolences," friend and fellow graphic artist Chris Kinniery said on Facebook. "I'm so sorry to hear about Norman Lee's disappearance. My condolences go out to his family. ... He was an amazing talent in the industry and it was always a pleasure to work with him," freelance artist .

# Highlights:
Comic book artist Norman Lee went missing in the Cayman Islands on Thursday .
Authorities called off search on Friday evening .

# Output:
Norman Lee, 47, went missing while snorkeling with his wife off the eastern coast of Grand Cayman.
Strong currents hindered the search, which lasted until Friday evening.
Lee was known for his work on "Wolverine Annual," "Supergirl," "Starman"
--------------------------
[4]

# Article:
New Delhi (CNN)Thankfully, no one was wounded after crude bombs were hurled at a Tamil news station in India on Thursday. But the loud explosions injured a vital part of the world's largest democracy: free speech. Last week, when India's government and a British documentarian faced off over a film featuring a man imprisoned for a 2012 gang rape in South Delhi, a little-known channel hundreds of miles away in southern India was waging its own battle. Hardline Hindu groups were angry with broadcaster Puthiya Thalaimurai for filming a show about the relevance of a traditional necklace -- called mangalsutra in Hindi and thaali in Tamil -- worn by married Indian women. For them, the contents, as shown in the promos, were offensive to Hindu culture. The station planned to release the program Sunday, International Women's Day. But it canceled the telecast after demonstrations took place outside its office. Protesters allegedly attacked one of its cameramen. Four days later, the channel came under fire again, when four men on two motorbikes threw bombs into its compound in a predawn attack, authorities say. Six people involved in the bombing have been arrested, said S. George, the commissioner of the southern Indian city of Chennai. Their leader turned himself in separately, claiming responsibility for the attack, police said. "The show wanted to give women a platform. We welcome all opinions and thoughts. But you cannot strangle freedom of free expression by violent means and threats," said Shyam Kumar, the CEO of New Generation Media Corp., which runs Puthiya Thalaimurai. "We condemn the attack in the strongest possible terms," he told CNN. 'India's Daughter,' the film banned by India: What did it show? But India is no stranger to censorship imposed legally or forced by rowdy protesters. The country's constitution guarantees freedom of expression, but not without restrictions. Communities or people claiming their religious sentiments were hurt by anyone else's opinion can file a lawsuit. Authorities can seek restraining orders from local courts -- as they did to ban the recent BBC documentary "India's Daughter" -- by citing potential disorder. Earlier last year, Penguin India withdrew "The Hindus: An Alternative History," a book by American academic Wendy Doniger, after a local advocacy group accused the writer of denigrating Hinduism. In December, a Bollywood movie, "PK," came under attack over similar accusations when mobs tore apart its posters in parts of India. A satire on religious rituals, "PK" became a roaring success by being one of the country's highest-grossing movies. But India, home to one of the world's largest film industries, has blocked several movies from screening. At least two films were not allowed last year. One of them featured the lives of the Sikh assassins of Prime Minister Indira Gandhi, and the other centered on the violence in Sri Lanka in the closing months of its civil war. Hounded by protests over his novel, Perumal Murugan, a Tamil author, announced quitting writing in a dramatic post on Facebook in January. "Perumal Murugan, the writer is dead. As he is no God, he is not going to resurrect himself. He has no faith in rebirth. As an ordinary teacher, he will live as P Murugan. Leave him alone," he  said on Facebook two months ago. Religious and caste-based organizations had slammed his novel "Madhorubhagan," which depicted a childless wife taking part in an ancient festival allowing consensual sex between strangers. Just last week, India blocked the BBC from airing "India's Daughter" because it included comments from one of the men convicted of raping a young student in a moving bus in New Delhi in 2012. The reason: The inmate's views could create unrest. "There's a growing intolerance towards different shades of opinion. It's a medieval mindset. What India needs is a concerted effort to move beyond it and embrace free expression in totality," said Kumar, the New Generation Media chief executive.

# Highlights:
Indian broadcaster Puthiya Thalaimurai drew protests for a show about traditional necklaces worn by married women .
This comes after India banned a film featuring a man imprisoned for a gang rape in South Delhi .
"There's a growing intolerance towards different shades of opinion," a broadcasting company CEO says .

# Output:
Tens of miles away in southern India, a little-known news station was waging its own battle.
Hardline Hindu groups were angry with broadcaster Puthiya Thalaimurai for filming a show about the relevance of a traditional necklace.
The contents, as shown in the promos, were offensive to Hindu culture.
--------------------------
```

# Transfer Learning with arxiv dataset

>*Note: Results of CNN in previous section.*

## Papers facebook/bart-base - Case 1: Defaults

- Model Name: `facebook/bart-base`

- Dataset: `ccdv/arxiv-summarization`

- Title token length: `1024`
- Sum token length: `128`

- Dataset train length: `10000`
- Dataset val length: `5000`

- Output length: `500`

### Score
```
Model Name: facebook/bart-base
Dataset: ccdv/arxiv-summarization

Scores Score(precision=0.28974797091797166, recall=0.026405668051173004, fmeasure=0.04761799254528512)
{'eval_loss': 2.591139554977417, 'eval_rouge2_precision': 0.28974797091797166, 'eval_rouge2_recall': 0.026405668051173004, 'eval_rouge2_fmeasure': 0.04761799254528512, 'eval_runtime': 823.2408, 'eval_samples_per_second': 6.074, 'eval_steps_per_second': 0.759, 'epoch': 1.0}
Model Name: facebook/bart-base

Scores:
- eval_loss: 2.591139554977417
- eval_rouge2_precision: 0.28974797091797166
- eval_rouge2_recall: 0.026405668051173004
- eval_rouge2_fmeasure: 0.04761799254528512
- eval_runtime: 823.2408
- eval_samples_per_second: 6.074
- eval_steps_per_second: 0.759
- epoch: 1.0

```

### Results

```
[1]

# Article:
methanol masers are often found in star - forming regions .
 there are two sets of transitions seen to produce methanol masers .
 class  i methanol masers ( most importantly the 36 and 44  ghz transitions ) are believed to be collisionally excited , while class  ii masers ( including the 6.7 and 12  ghz transitions ) are radiatively excited @xcite .
 class  i and class  ii methanol masers are sometimes both found in association with the same source ( e.g. , * ? ? ? * ) , but the two classes of masers are very rarely seen at the same velocity or in close ( subarcsecond ) spatial overlap .
 class  i methanol masers , in which shocks dominate over infrared radiation , have often been assumed to be tracing an earlier evolutionary state of star formation than class  ii methanol , water , or oh masers ( e.g. , * ? ? ?
 * ; * ? ? ?
 subcategorization of class  i masers by physical conditions may be possible @xcite , leading some authors to speculate that line intensity ratios among the class  i masers may be a proxy for evolutionary stage @xcite .
 however , class  i maser studies have traditionally been biased towards regions hosting other tracers of star formation , and the cluster environments in which class  i masers are found are usually quite complex , calling into question traditional models of the evolutionary timeline of class  i masers ( section 4.4 of * ? ? ? * and references therein ) .
 furthermore , class  i masers have typically been observed with single - dish telescopes , which can identify whether or not a particular class  i transition produces masers in a region ( and how bright they are ) but do not have the resolution to determine their location relative to masers in other transitions .
 given the complex environments associated with clustered star formation , high angular resolution is required to identify the relations between masers and excitation sources ( e.g. , * ? ? ? * ) and between multiple transitions of methanol @xcite .
 higher angular resolution is also necessary to understand the physical conditions that produce masers in each of the class  i transitions , which may not be identical ( e.g. , * ? ? ?
 * ; * ? ? ?
 * ; * ? ? ?
 these concerns motivated @xcite to do an unbiased single - dish search for class  i methanol masers in nearby molecular clouds , resulting in the detection of new class  i maser features . several sites within these clouds
 host previously known 44  ghz methanol masers , many of which have been mapped interferometrically ( e.g. , * ? ? ?
 * ; * ? ? ?
 * ; * ? ? ?
 imaging the 36  ghz masers , the other bright transition seen in numerous sources @xcite , has heretofore not been possible due to the lack of interferometers operating at this frequency .
 however , recent upgrades to the australia telescope compact array and the expanded very large array ( evla ) are allowing the first arcsecond - resolution images of 36  ghz masers to be produced @xcite . in this letter
 , we report on the first evla maps of the 36  ghz masers in the dr21 star - forming complex .
 the evla was used to observe the 36.169  ghz @xmath0 line of methanol in dr21(oh ) , dr21w , and dr21n on 2010 may 26 . the array consisted of the 20 telescopes outfitted with ka - band receivers .
 the evla was in its most compact ( d ) configuration , providing a synthesized beamwidth of approximately @xmath1 .
 all three sources were observed in dual circular polarization centered on a fixed sky frequency of 36.1731  ghz and correlated with the new widar correlator .
 the 4  mhz observing bandwidth was divided into 256 spectral channels , giving a velocity coverage of 33  kms@xmath2 with a channel spacing of 0.13  kms@xmath2 .
 conversion from sky frequency to lsr velocity was performed with the assistance of the evla online dopset tool .
 total on - source observing time was @xmath3  min per source .
 typical single - channel noise levels were @xmath4  mjybeam@xmath2 near the center of the f

# Abstract:
class  i methanol masers are believed to be produced in the shock - excited environment around star - forming regions .
 many authors have argued that the appearance of various subsets of class  i masers may be indicative of specific evolutionary stages of star formation or excitation conditions . until recently , however , no major interferometer was capable of imaging the important 36  ghz transition .
 we report on expanded very large array observations of the 36  ghz methanol masers and submillimeter array observations of the 229  ghz methanol masers in dr21(oh ) , dr21n , and dr21w .
 the distribution of 36  ghz masers in the outflow of dr21(oh ) is similar to that of the other class  i methanol transitions , with numerous multitransition spatial overlaps .
 at the site of the main continuum source in dr21(oh ) , class  i masers at 36 and 229  ghz are found in virtual overlap with class  ii 6.7  ghz masers . to the south of the outflow
 , the 36  ghz masers are scattered over a large region but usually do not appear coincident with 44  ghz masers .
 in dr21w we detect an `` s - curve '' signature in stokes v that implies a large value of the magnetic field strength if interpreted as due to zeeman splitting , suggesting either that class  i masers may exist at higher densities than previously believed or that the direct zeeman interpretation of s - curve stokes v profiles in class  i masers may be incorrect .
 we find a diverse variety of different maser phenomena in these sources , suggestive of differing physical conditions among them .

# Output:
we report on the first arcsecond - resolution images of the 36.169  ghz @xmath0 line of methanol in dr21(oh ), dr21w, and dr21n on 2010 may 26.
 the images were obtained using the australia telescope compact array and the expanded very large array ( evla ). the evla was in its most compact ( d ) configuration, providing a synthesized beamwidth of approximately @xxmath1.
 all three sources were observed in dual circular polarization centered on a fixed sky frequency of approximately 0.5 mhz. the data
--------------------------
[2]

# Article:
coupled - cluster ( cc ) method  @xcite is a powerful and ubiquitous technique for solving quantum many - body problem .
 let us briefly recapitulate general features of the cc method , so we can motivate our further discussion
 . at the heart of the cc method lies the exponential ansatz for the exact many - body wavefunction @xmath5 here @xmath6 is the cluster operator involving amplitudes @xmath7 of @xmath8-fold particle - hole excitations from the reference slater determinant @xmath9 . the parametrization   is derived from rigorous re - summation of many - body perturbation theory ( mbpt ) series . from solving the eigenvalue equation one determines the cluster amplitudes and the associated energies .
 while the ansatz  ( [ eq : ccparam ] ) contains an _ infinite _ number of terms due to expansion of the exponent , the resulting equations for cluster amplitudes @xmath7 contain a _
 finite _ number of terms .
 this simplifying property is unfortunately lost when the resulting wavefunctions are used in calculations of matrix elements : upon expansion of exponents the number of terms becomes infinite .
 indeed , consider matrix elements of an operator @xmath10 , e.g. , transition amplitude between two states @xmath11 with normalization @xmath12 .
 it is clear that both the numerator and denominator have infinite numbers of terms , e.g. , @xmath13 in this paper we address a question of partially summing the terms of the above expansion for matrix elements , so that the result subsumes an infinite number of terms .
 more specifically we are interested in transitions between states of univalent atoms , such as alkali - metal atoms .
 there has been a number of relativistic coupled - cluster calculations for these systems @xcite . in particular , calculations
 @xcite ignore the non - linear terms ( @xmath14 and @xmath15 ) in the expansion  ( [ eq : zmelseries ] ) ; we will designate this approximation as linearized coupled - cluster ( lcc ) method .
 at the same time , it is well established that for the univalent atoms an important chain of many - body diagrams for matrix elements comes from so - called random - phase approximation ( rpa ) .
 a direct comparison of the rpa series and the truncated lcc expansion in ref .
 @xcite leads to a conclusion that a fraction of the rpa chain is missed due to the omitted non - linear terms .
 one of the methods to correct for the missing rpa diagrams has been investigated in ref .  @xcite .
 these authors replaced the bare matrix elements with the dressed matrix elements as prescribed by the rpa method .
 such a direct rpa dressing involved a partial subset of diagrams already included in the cc method , i.e. , it leads to a double - counting of diagrams . to partially rectify this shortcoming , the authors of ref .  @xcite
 have manually removed certain leading - order diagrams , higher - order terms being doubly counted .
 here we present an alternative infinite - summation scheme for rpa chain that avoids the double counting and thus a manual removal of the `` extra '' diagrams .
 in addition to the rpa - like dressing of the coupled - cluster diagrams for matrix elements , we consider another subset of diagrams that leads to a dressing of particle and hole lines in the cc diagrams .
 the leading order corrections due to the dressing scheme presented here arise in the fourth order of mbpt , and in this paper we present a detailed comparison with the relevant fourth - order diagrams . finally , we illustrate our approach with relativistic computation of hyperfine - structure constants and dipole matrix elements for cs atom .
 in addition to dressing corrections we incorporate certain classes of diagrams from the direct fourth - order mbpt calculation ( as in ref .
 @xcite ) , so that the result is complete through the fourth order . to the best of our knowledge ,
 the reported calculations are the first calculations for cs complete through the fourth order of mbpt .

# Abstract:
we consider evaluation of matrix elements with the coupled - cluster method .
 such calculations formally involve infinite number of terms and we devise a method of partial summation ( dressing ) of the resulting series .
 our formalism is built upon an expansion of the product @xmath0 of cluster amplitudes @xmath1 into a sum of @xmath2-body insertions .
 we consider two types of insertions : particle / hole line insertion and two - particle / two - hole random - phase - approximation - like insertion .
 we demonstrate how to `` dress '' these insertions and formulate iterative equations .
 we illustrate the dressing equations in the case when the cluster operator is truncated at single and double excitations . using univalent systems as an example , we upgrade coupled - cluster diagrams for matrix elements with the dressed insertions and highlight a relation to pertinent fourth - order diagrams .
 we illustrate our formalism with relativistic calculations of hyperfine constant @xmath3 and @xmath4 electric - dipole transition amplitude for cs atom . finally , we augment the truncated coupled - cluster calculations with otherwise omitted fourth - order diagrams .
 the resulting analysis for cs is complete through the fourth - order of many - body perturbation theory and reveals an important role of triple and disconnected quadruple excitations .

# Output:
we present an alternative infinite - summation scheme for many - body perturbation theory ( mbpt ) for matrix elements.
 this scheme avoids the double counting and thus a manual removal of the `` extra '' diagrams. in addition to the rpa - like dressing of the coupled - cluster diagrams, we consider another subset of diagrams that leads to a dressing of particle and hole lines, and in this paper we present a detailed comparison with the relevant fourth - order diagrams.
--------------------------
[3]

# Article:
the influence of space charges in proportional counters has been studied theoretically and experimentally and is described in various articles @xcite . there , the main emphasis was put on the drop of the gas gain which is important for measuring charges .
 + for drift chambers in which no charge measurement is foreseen , the gain drop is of secondary importance and the drift time is the relevant information .
 variations in the drift time due to a disturbed electric field lead to a loss in the spatial resolution .
 + this work was done in the context of the development of the atlas muon spectrometer where high background rates coming from neutrons and photons are expected .
 the detector should still work at background rates of 500hz/@xmath0 ( 5 times the expected background)@xcite  with a spatial resolution for a single tube better than 100@xmath1 m .
 + the atlas muon spectrometer will be built from 3 cm diameter drift tubes with a 50@xmath1 m wire in the middle .
 the gas pressure is raised to 3bars absolute in order to reach the desired spatial resolution at the nominal gas gain of @xmath2 .
 if not stated otherwise , all the measurements and calculations described here refer to these operating conditions .
 + important criteria for the choice of the gases are given by the need of a non - flammable gas with a maximum drift time well below 1@xmath1s .
 up to an accumulated charge deposition of 0.6c per cm wire , ageing effects should be excluded . +
 the outline of this article is as follows : in section2 a calculation of the space charge effects is shown , section3 gives a description of the experimental setup in the test beam .
 section 4 shows how the data readout and analysis was done .
 section5 gives results of the gas gain reduction , followed by the treatment of the changes in the drift time in section6 .
 the event - to - event fluctuations which are responsible for the irreducible loss of the spatial resolution are described in section7 .
 the electric field inside a drift tube with radius @xmath3 and wire radius @xmath4 held at potential @xmath5 is given by : @xmath6 this formula is only correct if one can neglect the positive ions that are produced in the avalanche processes .
 they drift towards the cathode and disturb the electric field .
 they screen the positive potential at the wire and lead to a reduction of the electric field near the wire .
 thus one expects a lower gas gain .
 since the total voltage between wire and tube is kept constant , the electric field at large radii is increased if the field near the wire is decreased .
 one expects a change in the drift time because the drift velocity for electrons depends on the electric field .
 + the density of the space charges coming from positive ions can be calculated assuming a homogeneous irradiation within one tube , thus neglecting all effects of event - to - event fluctuations .
 these fluctuations will not change the mean value of the charge density but affect the resolution as will be shown in the last section . +
 the drift velocity @xmath7 of the positive ions is proportional to the reduced electric field @xmath8 where @xmath9 is the gas pressure , @xmath1 the mobility of the gas and the electric field is given by ( [ eq : ef ] ) .
 the maximum ion drift time @xmath10 ( which is the drift time for almost all ions because nearly all of them are produced at the wire ) is obtained by integrating ( [ eq : mue ] ) , @xmath11 note that for the calculation of the above formula the electric field ( [ eq : ef ] ) of the undisturbed tube is used . for high rates , the electric field will change and the maximum ion drift time has to be corrected , as will be shown below .
 + from @xcite ] one can see that the density of the ions @xmath12 is independent of the radial distance @xmath13 from the tube centre in case of a cylindric tube geometry for a homogeneous irradiation .
 since @xmath14 the density of the ions is : @xmath15 with

# Abstract:
this article describes calculations and measurements of space charge effects due to high rate irradiation in high resolution drift tubes .
 two main items are studied : the reduction of the gas gain and changes of the drift time .
 whereas the gain reduction is similar for all gases and unavoidable , the drift time changes depend on the kind of gas that is used .
 the loss in resolution due to high particle rate can be minimized with a suitable gas .
 this behaviour is calculable , allowing predictions for new gas mixtures .

# Output:
the effect of space charges on the spatial resolution of the atlas muon spectrometer is studied theoretically and experimentally and is described in various articles @xcite.
 the main emphasis was put on the drop of the gas gain which is important for measuring charges.
 this work was done in the context of the development of the @xmath0 detector where high background rates coming from neutrons and photons are expected.
--------------------------
[4]

# Article:
one of the most intriguing problems in quantum chromodynamics ( qcd ) is the growth of the cross sections for hadronic interactions with energy .
 as well - known , the increase of energy causes a fast growth of the gluon density and consequently of the cross sections . at very high energies , this growth should not continue indefinitely and at some point , one has to deal with gluon recombination and multiple scattering in order to restore unitarity .
 this interaction between overlapping partons is called saturation and has deserved active studies over the last thirty years @xcite .
 more generally , the large amount of work devoted to the description and understanding of perturbative qcd in the high - energy limit covers the description of saturation on the theoretical side as well as its applications to phenomenology .
 the theoretical contribution comes mainly from the development of non - linear qcd equations describing the evolution of scattering amplitudes towards this limit , together with the search of the solutions to those equations .
 the simplest of such equations is the balitsky - kovchegov ( bk ) equation @xcite , which corresponds to the balitsky - fadin - kuraev - lipatov ( bfkl ) @xcite linear evolution equation with the addition of a non - linear term responsible for the saturation of the growth of gluon density .
 it has been shown @xcite that the bk equation is in the equivalence class of the fisher - kolmogorov - petrovsky - piscounov ( fkpp ) nonlinear partial differential equation @xcite , which admits travelling - wave solutions , translating , in terms of qcd variables , into _ geometric scaling _ as we shall explain below .    from the phenomenological side ,
 the geometric scaling has been observed at the desy @xmath0 collider hera , in the measurements on inclusive @xmath1 scattering @xcite .
 this phenomenological feature of high - energy deep inelastic scattering ( dis ) is expressed as a scaling property of the virtual photon - proton cross section @xmath2 that is , the cross section depends on the scaling variable @xmath3 instead of @xmath4 and @xmath5 separately . here
 @xmath4 is the virtuality of the photon , @xmath6 is the total rapidity , @xmath7 is the bjorken-@xmath7 , related to the centre - of - mass energy through @xmath8 and @xmath9 is an increasing function of @xmath10 called the _
 saturation scale_. the geometric scaling is actually equivalent to the formation of travelling - wave solutions for the bk equation .
 this is thus a remarkable consequence of saturation , which extends arbitrarily far beyond the fully saturated domain , _
 i.e. _ in the dilute regime where saturation effects may seem negligible .    in this paper
 , we use the dipole model @xcite to relate the @xmath1 cross - section to the dipole - proton forward scattering .
 this approach has already been proven successful _
 e.g. _ in @xcite .
 our approach here is to parametrise the dipole - proton amplitude in momentum space , where the travelling - waves have been originally derived .
 we shall discuss the advantages of our method and compare it with previous results in the literature later on .
 the plan of this paper is as follows . in section [ sec : dipole ] , we relate the @xmath1 cross section to the dipole - proton scattering amplitude within the dipole framework .
 we then discuss , in section [ sec : qcd ] how one can describe the dipole scattering amplitude from the properties of the bk equation . in section [ sec : model ]
 we gather all information to build the complete model for the proton structure function . the fitting procedure used to compare our model with the experimental measurements
 is explained in section [ sec : fit ] and the results of the fit are presented in section [ sec : res ] .
 we discuss the link with previous approaches in the literature and possible situations in which our work can find interesting applications in section [ sec : ccl ] .
 picture represe

# Abstract:
we reproduce the dis measurements of the proton structure function at high energy from the dipole model in momentum space .
 to model the dipole - proton forward scattering amplitude , we use the knowledge of asymptotic solutions of the balitsky - kovchegov equation , describing high - energy qcd in the presence of saturation effects .
 we compare our results with the previous analysis in coordinate space and discuss possible extensions of our approach .

# Output:
we study the dipole - proton scattering amplitude in momentum space, where the travelling - waves have been originally derived.
 we compare it with previous results in the literature.
--------------------------
```

## Papers facebook/bart-base - Case 2

- Model Name: `facebook/bart-base`

- Dataset:` ccdv/arxiv-summarization`

- Title token length: `1024`
- Sum token length: `256`

- Dataset train length: `5000`
- Dataset val length: `2500`

- Output length: `500`

### Score
```
Model Name: facebook/bart-base
Dataset: ccdv/arxiv-summarization


Scores Score(precision=0.2798490268065265, recall=0.025413400871826343, fmeasure=0.0458198253265496)
{'eval_loss': 2.142030715942383, 'eval_rouge2_precision': 0.2798490268065265, 'eval_rouge2_recall': 0.025413400871826343, 'eval_rouge2_fmeasure': 0.0458198253265496, 'eval_runtime': 437.0109, 'eval_samples_per_second': 5.721, 'eval_steps_per_second': 0.716, 'epoch': 1.0}
Model Name: facebook/bart-base

Scores:
- eval_loss: 2.142030715942383
- eval_rouge2_precision: 0.2798490268065265
- eval_rouge2_recall: 0.025413400871826343
- eval_rouge2_fmeasure: 0.0458198253265496
- eval_runtime: 437.0109
- eval_samples_per_second: 5.721
- eval_steps_per_second: 0.716
- epoch: 1.0
```

### Results

```
[1]

# Article:
methanol masers are often found in star - forming regions .
 there are two sets of transitions seen to produce methanol masers .
 class  i methanol masers ( most importantly the 36 and 44  ghz transitions ) are believed to be collisionally excited , while class  ii masers ( including the 6.7 and 12  ghz transitions ) are radiatively excited @xcite .
 class  i and class  ii methanol masers are sometimes both found in association with the same source ( e.g. , * ? ? ? * ) , but the two classes of masers are very rarely seen at the same velocity or in close ( subarcsecond ) spatial overlap .
 class  i methanol masers , in which shocks dominate over infrared radiation , have often been assumed to be tracing an earlier evolutionary state of star formation than class  ii methanol , water , or oh masers ( e.g. , * ? ? ?
 * ; * ? ? ?
 subcategorization of class  i masers by physical conditions may be possible @xcite , leading some authors to speculate that line intensity ratios among the class  i masers may be a proxy for evolutionary stage @xcite .
 however , class  i maser studies have traditionally been biased towards regions hosting other tracers of star formation , and the cluster environments in which class  i masers are found are usually quite complex , calling into question traditional models of the evolutionary timeline of class  i masers ( section 4.4 of * ? ? ? * and references therein ) .
 furthermore , class  i masers have typically been observed with single - dish telescopes , which can identify whether or not a particular class  i transition produces masers in a region ( and how bright they are ) but do not have the resolution to determine their location relative to masers in other transitions .
 given the complex environments associated with clustered star formation , high angular resolution is required to identify the relations between masers and excitation sources ( e.g. , * ? ? ? * ) and between multiple transitions of methanol @xcite .
 higher angular resolution is also necessary to understand the physical conditions that produce masers in each of the class  i transitions , which may not be identical ( e.g. , * ? ? ?
 * ; * ? ? ?
 * ; * ? ? ?
 these concerns motivated @xcite to do an unbiased single - dish search for class  i methanol masers in nearby molecular clouds , resulting in the detection of new class  i maser features . several sites within these clouds
 host previously known 44  ghz methanol masers , many of which have been mapped interferometrically ( e.g. , * ? ? ?
 * ; * ? ? ?
 * ; * ? ? ?
 imaging the 36  ghz masers , the other bright transition seen in numerous sources @xcite , has heretofore not been possible due to the lack of interferometers operating at this frequency .
 however , recent upgrades to the australia telescope compact array and the expanded very large array ( evla ) are allowing the first arcsecond - resolution images of 36  ghz masers to be produced @xcite . in this letter
 , we report on the first evla maps of the 36  ghz masers in the dr21 star - forming complex .
 the evla was used to observe the 36.169  ghz @xmath0 line of methanol in dr21(oh ) , dr21w , and dr21n on 2010 may 26 . the array consisted of the 20 telescopes outfitted with ka - band receivers .
 the evla was in its most compact ( d ) configuration , providing a synthesized beamwidth of approximately @xmath1 .
 all three sources were observed in dual circular polarization centered on a fixed sky frequency of 36.1731  ghz and correlated with the new widar correlator .
 the 4  mhz observing bandwidth was divided into 256 spectral channels , giving a velocity coverage of 33  kms@xmath2 with a channel spacing of 0.13  kms@xmath2 .
 conversion from sky frequency to lsr velocity was performed with the assistance of the evla online dopset tool .
 total on - source observing time was @xmath3  min per source .
 typical single - channel noise levels were @xmath4  mjybeam@xmath2 near the center of the f

# Abstract:
class  i methanol masers are believed to be produced in the shock - excited environment around star - forming regions .
 many authors have argued that the appearance of various subsets of class  i masers may be indicative of specific evolutionary stages of star formation or excitation conditions . until recently , however , no major interferometer was capable of imaging the important 36  ghz transition .
 we report on expanded very large array observations of the 36  ghz methanol masers and submillimeter array observations of the 229  ghz methanol masers in dr21(oh ) , dr21n , and dr21w .
 the distribution of 36  ghz masers in the outflow of dr21(oh ) is similar to that of the other class  i methanol transitions , with numerous multitransition spatial overlaps .
 at the site of the main continuum source in dr21(oh ) , class  i masers at 36 and 229  ghz are found in virtual overlap with class  ii 6.7  ghz masers . to the south of the outflow
 , the 36  ghz masers are scattered over a large region but usually do not appear coincident with 44  ghz masers .
 in dr21w we detect an `` s - curve '' signature in stokes v that implies a large value of the magnetic field strength if interpreted as due to zeeman splitting , suggesting either that class  i masers may exist at higher densities than previously believed or that the direct zeeman interpretation of s - curve stokes v profiles in class  i masers may be incorrect .
 we find a diverse variety of different maser phenomena in these sources , suggestive of differing physical conditions among them .

# Output:
we report on the first arcsecond - resolution images of the 36.169  ghz @xmath0 line of methanol in dr21(oh ), dr21w, and dr21n on 2010 may 26.
 we present a single - dish search for class  i masers in the dr21 star - forming complex using the australia telescope compact array and the expanded very large array ( evla ). in this paper
, we present the first high - angular resolution ( arcsecond ) images of class  ii masers seen in nearby molecular clouds.
--------------------------
[2]

# Article:
coupled - cluster ( cc ) method  @xcite is a powerful and ubiquitous technique for solving quantum many - body problem .
 let us briefly recapitulate general features of the cc method , so we can motivate our further discussion
 . at the heart of the cc method lies the exponential ansatz for the exact many - body wavefunction @xmath5 here @xmath6 is the cluster operator involving amplitudes @xmath7 of @xmath8-fold particle - hole excitations from the reference slater determinant @xmath9 . the parametrization   is derived from rigorous re - summation of many - body perturbation theory ( mbpt ) series . from solving the eigenvalue equation one determines the cluster amplitudes and the associated energies .
 while the ansatz  ( [ eq : ccparam ] ) contains an _ infinite _ number of terms due to expansion of the exponent , the resulting equations for cluster amplitudes @xmath7 contain a _
 finite _ number of terms .
 this simplifying property is unfortunately lost when the resulting wavefunctions are used in calculations of matrix elements : upon expansion of exponents the number of terms becomes infinite .
 indeed , consider matrix elements of an operator @xmath10 , e.g. , transition amplitude between two states @xmath11 with normalization @xmath12 .
 it is clear that both the numerator and denominator have infinite numbers of terms , e.g. , @xmath13 in this paper we address a question of partially summing the terms of the above expansion for matrix elements , so that the result subsumes an infinite number of terms .
 more specifically we are interested in transitions between states of univalent atoms , such as alkali - metal atoms .
 there has been a number of relativistic coupled - cluster calculations for these systems @xcite . in particular , calculations
 @xcite ignore the non - linear terms ( @xmath14 and @xmath15 ) in the expansion  ( [ eq : zmelseries ] ) ; we will designate this approximation as linearized coupled - cluster ( lcc ) method .
 at the same time , it is well established that for the univalent atoms an important chain of many - body diagrams for matrix elements comes from so - called random - phase approximation ( rpa ) .
 a direct comparison of the rpa series and the truncated lcc expansion in ref .
 @xcite leads to a conclusion that a fraction of the rpa chain is missed due to the omitted non - linear terms .
 one of the methods to correct for the missing rpa diagrams has been investigated in ref .  @xcite .
 these authors replaced the bare matrix elements with the dressed matrix elements as prescribed by the rpa method .
 such a direct rpa dressing involved a partial subset of diagrams already included in the cc method , i.e. , it leads to a double - counting of diagrams . to partially rectify this shortcoming , the authors of ref .  @xcite
 have manually removed certain leading - order diagrams , higher - order terms being doubly counted .
 here we present an alternative infinite - summation scheme for rpa chain that avoids the double counting and thus a manual removal of the `` extra '' diagrams .
 in addition to the rpa - like dressing of the coupled - cluster diagrams for matrix elements , we consider another subset of diagrams that leads to a dressing of particle and hole lines in the cc diagrams .
 the leading order corrections due to the dressing scheme presented here arise in the fourth order of mbpt , and in this paper we present a detailed comparison with the relevant fourth - order diagrams . finally , we illustrate our approach with relativistic computation of hyperfine - structure constants and dipole matrix elements for cs atom .
 in addition to dressing corrections we incorporate certain classes of diagrams from the direct fourth - order mbpt calculation ( as in ref .
 @xcite ) , so that the result is complete through the fourth order . to the best of our knowledge ,
 the reported calculations are the first calculations for cs complete through the fourth order of mbpt .

# Abstract:
we consider evaluation of matrix elements with the coupled - cluster method .
 such calculations formally involve infinite number of terms and we devise a method of partial summation ( dressing ) of the resulting series .
 our formalism is built upon an expansion of the product @xmath0 of cluster amplitudes @xmath1 into a sum of @xmath2-body insertions .
 we consider two types of insertions : particle / hole line insertion and two - particle / two - hole random - phase - approximation - like insertion .
 we demonstrate how to `` dress '' these insertions and formulate iterative equations .
 we illustrate the dressing equations in the case when the cluster operator is truncated at single and double excitations . using univalent systems as an example , we upgrade coupled - cluster diagrams for matrix elements with the dressed insertions and highlight a relation to pertinent fourth - order diagrams .
 we illustrate our formalism with relativistic calculations of hyperfine constant @xmath3 and @xmath4 electric - dipole transition amplitude for cs atom . finally , we augment the truncated coupled - cluster calculations with otherwise omitted fourth - order diagrams .
 the resulting analysis for cs is complete through the fourth - order of many - body perturbation theory and reveals an important role of triple and disconnected quadruple excitations .

# Output:
we present an alternative infinite - summation scheme for matrix elements that avoids the double counting and thus a manual removal of the `` extra '' diagrams.
 in addition to the rpa - like dressing of the coupled - cluster diagrams, we consider another subset of diagrams that leads to a dressing of particle and hole lines in the cc diagrams, and in this paper we present a detailed comparison with the relevant fourth - order diagrams.
--------------------------
[3]

# Article:
the influence of space charges in proportional counters has been studied theoretically and experimentally and is described in various articles @xcite . there , the main emphasis was put on the drop of the gas gain which is important for measuring charges .
 + for drift chambers in which no charge measurement is foreseen , the gain drop is of secondary importance and the drift time is the relevant information .
 variations in the drift time due to a disturbed electric field lead to a loss in the spatial resolution .
 + this work was done in the context of the development of the atlas muon spectrometer where high background rates coming from neutrons and photons are expected .
 the detector should still work at background rates of 500hz/@xmath0 ( 5 times the expected background)@xcite  with a spatial resolution for a single tube better than 100@xmath1 m .
 + the atlas muon spectrometer will be built from 3 cm diameter drift tubes with a 50@xmath1 m wire in the middle .
 the gas pressure is raised to 3bars absolute in order to reach the desired spatial resolution at the nominal gas gain of @xmath2 .
 if not stated otherwise , all the measurements and calculations described here refer to these operating conditions .
 + important criteria for the choice of the gases are given by the need of a non - flammable gas with a maximum drift time well below 1@xmath1s .
 up to an accumulated charge deposition of 0.6c per cm wire , ageing effects should be excluded . +
 the outline of this article is as follows : in section2 a calculation of the space charge effects is shown , section3 gives a description of the experimental setup in the test beam .
 section 4 shows how the data readout and analysis was done .
 section5 gives results of the gas gain reduction , followed by the treatment of the changes in the drift time in section6 .
 the event - to - event fluctuations which are responsible for the irreducible loss of the spatial resolution are described in section7 .
 the electric field inside a drift tube with radius @xmath3 and wire radius @xmath4 held at potential @xmath5 is given by : @xmath6 this formula is only correct if one can neglect the positive ions that are produced in the avalanche processes .
 they drift towards the cathode and disturb the electric field .
 they screen the positive potential at the wire and lead to a reduction of the electric field near the wire .
 thus one expects a lower gas gain .
 since the total voltage between wire and tube is kept constant , the electric field at large radii is increased if the field near the wire is decreased .
 one expects a change in the drift time because the drift velocity for electrons depends on the electric field .
 + the density of the space charges coming from positive ions can be calculated assuming a homogeneous irradiation within one tube , thus neglecting all effects of event - to - event fluctuations .
 these fluctuations will not change the mean value of the charge density but affect the resolution as will be shown in the last section . +
 the drift velocity @xmath7 of the positive ions is proportional to the reduced electric field @xmath8 where @xmath9 is the gas pressure , @xmath1 the mobility of the gas and the electric field is given by ( [ eq : ef ] ) .
 the maximum ion drift time @xmath10 ( which is the drift time for almost all ions because nearly all of them are produced at the wire ) is obtained by integrating ( [ eq : mue ] ) , @xmath11 note that for the calculation of the above formula the electric field ( [ eq : ef ] ) of the undisturbed tube is used . for high rates , the electric field will change and the maximum ion drift time has to be corrected , as will be shown below .
 + from @xcite ] one can see that the density of the ions @xmath12 is independent of the radial distance @xmath13 from the tube centre in case of a cylindric tube geometry for a homogeneous irradiation .
 since @xmath14 the density of the ions is : @xmath15 with

# Abstract:
this article describes calculations and measurements of space charge effects due to high rate irradiation in high resolution drift tubes .
 two main items are studied : the reduction of the gas gain and changes of the drift time .
 whereas the gain reduction is similar for all gases and unavoidable , the drift time changes depend on the kind of gas that is used .
 the loss in resolution due to high particle rate can be minimized with a suitable gas .
 this behaviour is calculable , allowing predictions for new gas mixtures .

# Output:
the influence of space charges in proportional counters has been studied theoretically and experimentally and is described in various articles @xcite.
 the main emphasis was put on the drop of the gas gain which is important for measuring charges. in this article
 we study the effect of space charge fluctuations on the spatial resolution of the atlas muon spectrometer.
--------------------------
[4]

# Article:
one of the most intriguing problems in quantum chromodynamics ( qcd ) is the growth of the cross sections for hadronic interactions with energy .
 as well - known , the increase of energy causes a fast growth of the gluon density and consequently of the cross sections . at very high energies , this growth should not continue indefinitely and at some point , one has to deal with gluon recombination and multiple scattering in order to restore unitarity .
 this interaction between overlapping partons is called saturation and has deserved active studies over the last thirty years @xcite .
 more generally , the large amount of work devoted to the description and understanding of perturbative qcd in the high - energy limit covers the description of saturation on the theoretical side as well as its applications to phenomenology .
 the theoretical contribution comes mainly from the development of non - linear qcd equations describing the evolution of scattering amplitudes towards this limit , together with the search of the solutions to those equations .
 the simplest of such equations is the balitsky - kovchegov ( bk ) equation @xcite , which corresponds to the balitsky - fadin - kuraev - lipatov ( bfkl ) @xcite linear evolution equation with the addition of a non - linear term responsible for the saturation of the growth of gluon density .
 it has been shown @xcite that the bk equation is in the equivalence class of the fisher - kolmogorov - petrovsky - piscounov ( fkpp ) nonlinear partial differential equation @xcite , which admits travelling - wave solutions , translating , in terms of qcd variables , into _ geometric scaling _ as we shall explain below .    from the phenomenological side ,
 the geometric scaling has been observed at the desy @xmath0 collider hera , in the measurements on inclusive @xmath1 scattering @xcite .
 this phenomenological feature of high - energy deep inelastic scattering ( dis ) is expressed as a scaling property of the virtual photon - proton cross section @xmath2 that is , the cross section depends on the scaling variable @xmath3 instead of @xmath4 and @xmath5 separately . here
 @xmath4 is the virtuality of the photon , @xmath6 is the total rapidity , @xmath7 is the bjorken-@xmath7 , related to the centre - of - mass energy through @xmath8 and @xmath9 is an increasing function of @xmath10 called the _
 saturation scale_. the geometric scaling is actually equivalent to the formation of travelling - wave solutions for the bk equation .
 this is thus a remarkable consequence of saturation , which extends arbitrarily far beyond the fully saturated domain , _
 i.e. _ in the dilute regime where saturation effects may seem negligible .    in this paper
 , we use the dipole model @xcite to relate the @xmath1 cross - section to the dipole - proton forward scattering .
 this approach has already been proven successful _
 e.g. _ in @xcite .
 our approach here is to parametrise the dipole - proton amplitude in momentum space , where the travelling - waves have been originally derived .
 we shall discuss the advantages of our method and compare it with previous results in the literature later on .
 the plan of this paper is as follows . in section [ sec : dipole ] , we relate the @xmath1 cross section to the dipole - proton scattering amplitude within the dipole framework .
 we then discuss , in section [ sec : qcd ] how one can describe the dipole scattering amplitude from the properties of the bk equation . in section [ sec : model ]
 we gather all information to build the complete model for the proton structure function . the fitting procedure used to compare our model with the experimental measurements
 is explained in section [ sec : fit ] and the results of the fit are presented in section [ sec : res ] .
 we discuss the link with previous approaches in the literature and possible situations in which our work can find interesting applications in section [ sec : ccl ] .
 picture represe

# Abstract:
we reproduce the dis measurements of the proton structure function at high energy from the dipole model in momentum space .
 to model the dipole - proton forward scattering amplitude , we use the knowledge of asymptotic solutions of the balitsky - kovchegov equation , describing high - energy qcd in the presence of saturation effects .
 we compare our results with the previous analysis in coordinate space and discuss possible extensions of our approach .

# Output:
we study the dipole - proton scattering amplitude in momentum space, where the travelling - waves have been originally derived.
 we compare our results with previous results in the literature on inclusive @xmath1 scattering.
--------------------------

```

# Compare results with a distilled model

>*Note: Results of CNN `facebook/bart-base` in previous sections.*

Model Name: `sshleifer/distilbart-cnn-12-6`

Dataset: `cnn_dailymail`

Output Length: `200`

## Score
```
Scores {'rouge2': AggregateScore(low=Score(precision=0.15804034367389377, recall=0.21168221868567605, fmeasure=0.1764605071668387), mid=Score(precision=0.16120423445273893, recall=0.21561135712654128, fmeasure=0.1798326727209022), high=Score(precision=0.16402693459257345, recall=0.21917607945417777, fmeasure=0.18268285313385296)), 'rougeLsum': AggregateScore(low=Score(precision=0.23755432838754056, recall=0.3206821377468737, fmeasure=0.26648763304043194), mid=Score(precision=0.24071940342655035, recall=0.3244017145688574, fmeasure=0.2696405687366908), high=Score(precision=0.24399399994366885, recall=0.32838803266764244, fmeasure=0.27281859616902615))}
{'eval_loss': 8.794404983520508, 'eval_rouge2_precision': 0.16120423445273893, 'eval_rouge2_recall': 0.21561135712654128, 'eval_rouge2_fmeasure': 0.1798326727209022, 'eval_runtime': 2844.5044, 'eval_samples_per_second': 1.758, 'eval_steps_per_second': 0.22}


Scores:
- eval_loss: 8.794404983520508
- eval_rouge2_precision: 0.16120423445273893
- eval_rouge2_recall: 0.21561135712654128
- eval_rouge2_fmeasure: 0.1798326727209022
- eval_runtime: 2844.5044
- eval_samples_per_second: 1.758
- eval_steps_per_second: 0.22
```


## Results
```
[1]

# Article:
(CNN)It was an act of frustration perhaps more commonly associated with golf's fictional anti-hero Happy Gilmore than the world's reigning No 1. player. But when Rory McIlroy pulled his second shot on the eighth hole of the WGC Cadillac Championship into a lake Friday, he might as well have been channeling the much loved Adam Sandler character. Before continuing his round with a dropped ball, the four-time major winner launched the 3-iron used to play the offending shot into the water as well. "(It) felt good at the time," a rueful McIlroy later said of the incident in comments carried by the PGA Tour website. "I just let frustration get the better of me. It was heat of the moment, and I mean, if it had of been any other club I probably wouldn't have but I didn't need a 3‑iron for the rest of the round so I thought, why not." The club "must have went a good 60, 70 yards," he joked. McIlroy composed himself to finish with a second round of 70, leaving him one-under for the tournament and eight shots off the pace set by leader JB Holmes. While an improvement on last weeks performance at the Honda Classic event, where he failed to make the cut, the Northern Irishman's frustration with elements of his game was still clear. "I think every golfer feels it because I don't hit shots like the one I hit on 8 on the range," he said. "That's what really bothers me, the fact that I get out on the course and I hit shots that I'm not seeing when I'm in a more relaxed environment. "So it's a little bit of mental, a little bit of physical. It's just everything is not quite matching up." Elsewhere on the course, Ryan Holmes scored a two-under-par 71 to remain in second position overall, two shots behind Holmes. Former world No 1., Adam Scott carded an impressive 68 to finish the day three shots off the pace at six-under while Bubba Watson and Henrik Stenson are tied for fourth on four-under.

# Highlights:
Rory McIlroy throws club into water at WGC Cadillac Championship .
Northern Irishman frustrated after pulling shot into water hazard .

# Output:
 Rory McIlroy pulls his second shot into a lake at the WGC Cadillac Championship. The world No. 1 is eight shots off the pace set by leader JB Holmes. The Northern Irishman says he "let frustration get the better of me" The four-time major winner also launched the 3-iron used to play the shot into the water.
--------------------------
[2]

# Article:
(CNN)Manchester United defender Jonny Evans and Newcastle United striker Papiss Cisse have been charged by the Football Association for allegedly spitting during an altercation in Wednesday night's Premier League game at St James' Park. In a statement, English football's governing body said the players had been charged "in relation to an alleged breach of FA Rule E1[a] in that in or around the 38th minute of the game the two players spat at each other. "The incidents were not seen by the match officials but [were] caught on video." The players have until 6pm GMT on Friday to respond to the charge, and could face six-game bans if found guilty. Both Evans and Cisse released statements the day after the incident, with Evans saying: "I would like to make it clear that I did not spit at Papiss Cisse." Cisse's statement said: "I reacted to something I found very unpleasant. Sometimes it is hard not to react, particularly in the heat of the moment. I have always tried hard to be positive a role model, especially for our young fans, and yesterday I let you down." Spitting at another player is considered beyond the pale by professional footballers, and former Liverpool midfielder Dietmar Hamann, now a TV pundit, told the BBC's Match of the Day programme the incident had been "disgusting." "This is not acceptable," he said. "There are kids watching. Something has to be done. The behaviour towards each other and the referee is deteriorating on a weekly basis." Ex-Manchester United midfielder Paul Scholes said he did not believe Evans had deliberately spat at Cisse, telling BT Sport: "Look, it's not very nice. I think Jonny is spitting on the floor. "I know Jonny -- he's not that type of person. If he wants to do that then it's not hard to miss, is it? He's only stood a yard away from him. What Cisse does afterwards is unforgivable." And former Liverpool player Steve McManaman told the channel: "Cisse stands up and spits right at Jonny Evans' neck from about six inches. It's absolutely disgusting. "Two wrongs do not make a right. If Jonny Evans has spat at him then it's wrong, but for Papiss Cisse to get up and react like that is absolutely disgusting. We talk about bad tackles, but that is worse."

# Highlights:
Alleged incident happened in match at St James' Park .
Players face six-match ban if found guilty .
Evans denied spitting in statement .
Cisse statement says: "I let you down"

# Output:
 Jonny Evans and Papiss Cisse have been charged by the Football Association for allegedly spitting at each other. The incident occurred during Wednesday night's Premier League game at St James' Park. The players have until 6pm GMT on Friday to respond to the charge and could face six-game bans if found guilty.
--------------------------
[3]

# Article:
(CNN)The search for a comic book artist missing in the Cayman Islands since Thursday is now being called a recovery mission. Norman Lee, an artist for DC and Marvel comics, went missing while snorkeling with his wife off the eastern coast of Grand Cayman, CNN affiliate WCVB reported. Strong currents hindered the search, which lasted until Friday evening, Cayman 27 reported. "It is unlikely that we will make any recovery at this stage," Chief Inspector Brad Ebanks told Cayman 27. Lee, 47, of Weymouth, Massachusetts, was known and for his work on "Wolverine Annual," "Supergirl," "Starman" and other comic book titles. Tributes flooded his Facebook page and Twitter from friends, fans and colleagues who knew him from art school and comic conventions. "I cannot express how shaken I am that I will never get the chance to see that smile again, and it saddens me that this world has lost a wonderful man in Norman Lee. To his wife Jan, and his family and all his friends and fans that loved him, my sincerest condolences," friend and fellow graphic artist Chris Kinniery said on Facebook. "I'm so sorry to hear about Norman Lee's disappearance. My condolences go out to his family. ... He was an amazing talent in the industry and it was always a pleasure to work with him," freelance artist .

# Highlights:
Comic book artist Norman Lee went missing in the Cayman Islands on Thursday .
Authorities called off search on Friday evening .

# Output:
 Norman Lee, an artist for DC and Marvel comics, went missing while snorkeling off the eastern coast of Grand Cayman. Strong currents hindered the search, which lasted until Friday evening. Lee, 47, of Weymouth, Massachusetts, was known for his work on "Wolverine Annual," "Supergirl," "Starman" and other titles.
--------------------------
[4]

# Article:
New Delhi (CNN)Thankfully, no one was wounded after crude bombs were hurled at a Tamil news station in India on Thursday. But the loud explosions injured a vital part of the world's largest democracy: free speech. Last week, when India's government and a British documentarian faced off over a film featuring a man imprisoned for a 2012 gang rape in South Delhi, a little-known channel hundreds of miles away in southern India was waging its own battle. Hardline Hindu groups were angry with broadcaster Puthiya Thalaimurai for filming a show about the relevance of a traditional necklace -- called mangalsutra in Hindi and thaali in Tamil -- worn by married Indian women. For them, the contents, as shown in the promos, were offensive to Hindu culture. The station planned to release the program Sunday, International Women's Day. But it canceled the telecast after demonstrations took place outside its office. Protesters allegedly attacked one of its cameramen. Four days later, the channel came under fire again, when four men on two motorbikes threw bombs into its compound in a predawn attack, authorities say. Six people involved in the bombing have been arrested, said S. George, the commissioner of the southern Indian city of Chennai. Their leader turned himself in separately, claiming responsibility for the attack, police said. "The show wanted to give women a platform. We welcome all opinions and thoughts. But you cannot strangle freedom of free expression by violent means and threats," said Shyam Kumar, the CEO of New Generation Media Corp., which runs Puthiya Thalaimurai. "We condemn the attack in the strongest possible terms," he told CNN. 'India's Daughter,' the film banned by India: What did it show? But India is no stranger to censorship imposed legally or forced by rowdy protesters. The country's constitution guarantees freedom of expression, but not without restrictions. Communities or people claiming their religious sentiments were hurt by anyone else's opinion can file a lawsuit. Authorities can seek restraining orders from local courts -- as they did to ban the recent BBC documentary "India's Daughter" -- by citing potential disorder. Earlier last year, Penguin India withdrew "The Hindus: An Alternative History," a book by American academic Wendy Doniger, after a local advocacy group accused the writer of denigrating Hinduism. In December, a Bollywood movie, "PK," came under attack over similar accusations when mobs tore apart its posters in parts of India. A satire on religious rituals, "PK" became a roaring success by being one of the country's highest-grossing movies. But India, home to one of the world's largest film industries, has blocked several movies from screening. At least two films were not allowed last year. One of them featured the lives of the Sikh assassins of Prime Minister Indira Gandhi, and the other centered on the violence in Sri Lanka in the closing months of its civil war. Hounded by protests over his novel, Perumal Murugan, a Tamil author, announced quitting writing in a dramatic post on Facebook in January. "Perumal Murugan, the writer is dead. As he is no God, he is not going to resurrect himself. He has no faith in rebirth. As an ordinary teacher, he will live as P Murugan. Leave him alone," he  said on Facebook two months ago. Religious and caste-based organizations had slammed his novel "Madhorubhagan," which depicted a childless wife taking part in an ancient festival allowing consensual sex between strangers. Just last week, India blocked the BBC from airing "India's Daughter" because it included comments from one of the men convicted of raping a young student in a moving bus in New Delhi in 2012. The reason: The inmate's views could create unrest. "There's a growing intolerance towards different shades of opinion. It's a medieval mindset. What India needs is a concerted effort to move beyond it and embrace free expression in totality," said Kumar, the New Generation Media chief executive.

# Highlights:
Indian broadcaster Puthiya Thalaimurai drew protests for a show about traditional necklaces worn by married women .
This comes after India banned a film featuring a man imprisoned for a gang rape in South Delhi .
"There's a growing intolerance towards different shades of opinion," a broadcasting company CEO says .

# Output:
 Hardline Hindu groups were angry with Puthiya Thalaimurai for filming a show about the relevance of a traditional necklace worn by married Indian women. The station planned to release the program Sunday, International Women's Day, but canceled the telecast. Four days later, four men on two motorbikes threw bombs into its compound in a predawn attack, authorities say.
--------------------------
```