In [1]:

import os
import numpy as np
import pandas as pd
# import matplotlib.pyplot as plt
from tqdm import tqdm
from datasets import load_dataset
import torch

import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, SwitchTransformersForConditionalGeneration

from langchain.prompts import PromptTemplate

from IPython.display import Markdown, display
import csv
from dataset_write_utils import SizeCappingFileWriter

In [2]:
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

### Load the dataset

In [3]:
from datasets import load_dataset
from tqdm import tqdm

dataset = load_dataset("cnn_dailymail",'1.0.0')
dataset

DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 11490
    })
})

In [4]:
dataset['test'][0]

{'article': '(CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday

In [5]:
template = """Summarize: {prompt}"""

prompt = PromptTemplate(template=template, input_variables=['prompt'])
prompt

PromptTemplate(input_variables=['prompt'], template='Summarize: {prompt}')

In [6]:
sample = dataset['test'][0]
display(Markdown(prompt.format(prompt=sample['article'])))

Summarize: (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC's founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday's ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court's treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What's objectionable is the attempts to undermine international justice, not Palestine's decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court's decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN's Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.

In [7]:
def format_text(example):
    text = prompt.format(prompt=example['article'] )
    return {"text": text}

In [8]:
dataset = dataset.map(format_text)
dataset

Map:   0%|          | 0/287113 [00:00<?, ? examples/s]

Map:   0%|          | 0/13368 [00:00<?, ? examples/s]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id', 'text'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id', 'text'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id', 'text'],
        num_rows: 11490
    })
})

### Load Model

In [9]:

tokenizer = AutoTokenizer.from_pretrained("google/switch-base-32")
model = SwitchTransformersForConditionalGeneration.from_pretrained("google/switch-base-32").to(device)
model

SwitchTransformersForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): SwitchTransformersStack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): SwitchTransformersBlock(
        (layer): ModuleList(
          (0): SwitchTransformersLayerSelfAttention(
            (SelfAttention): SwitchTransformersAttention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): SwitchTransformersLayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): SwitchTransformersLayerFF(
            (mlp): SwitchTransformersDenseActDense(
              (wi): Linear(in_features=768, out_features=3072

### Evaluation

In [10]:
input_ids = tokenizer(
    ("summarize: studies have shown that owning a dog is good for you"), return_tensors="pt", padding=True ).input_ids  # Batch size 1

tokenizer.decode(model.generate(input_ids, return_dict_in_generate=True, encoder_router_logits=False, decoder_router_logits=True, output_logits=True)['sequences'][0])



'<pad><extra_id_0></s>'

In [47]:
def get_summary(text, highlight):
    inputs = tokenizer(text, return_tensors='pt', padding=True)
    num_output_tokens = len(tokenizer(highlight, return_tensors='pt')[0])
    print(num_output_tokens)
    output = model.generate(input_ids=inputs['input_ids'].to(device), attention_mask=inputs['attention_mask'].to(device), 
    return_dict_in_generate=True, encoder_router_logits=False, decoder_router_logits=True, output_logits=True,max_new_tokens=num_output_tokens )

    return output

#### Test run

In [48]:
bar = tqdm(enumerate(dataset['train']), total=len(dataset['train']))
for i, data in bar:
    print(i, data)
    print("Input text:",data['text'])
    print("Expected output:",data['highlights'])
    summary = get_summary(data['text'], data['highlights'])
    # print('Expected answer:', data['answer'], ' Got ans_list:', ans_list)
    print("Model output:", tokenizer.decode(summary['sequences'][0]))
    if i == 0:
        break

  0%|          | 0/287113 [00:00<?, ?it/s]

0 {'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office ch

  0%|          | 0/287113 [00:02<?, ?it/s]

Model output: <pad><extra_id_0>. Daniel Radcliffe as Harry Potter. Photo: Reuters. Radcliffe is a British citizen. He is a teenager. He is a teenager.<extra_id_1>. Daniel Radcliffe. Photo: Reuters. Reuters. Reuters. Reuters.





## Full run

In [50]:
def get_db_row(data, ans):
    dataset_row = [data['id']]          ## Dataset ID
    for i,tokens_logit in enumerate(ans.decoder_router_logits):
        layer_router_activation = []
        dataset_row.append(i)           ## Generated Token Number
        for j, layer_logit in enumerate(tokens_logit):
            if j%2 == 1:                ## For Switch T of odd layers are Experts. Change for other models
                layer_router_activation.append(layer_logit[0].reshape(-1))  ## Layerwise expert activation
                layer_router_activation.append(layer_logit[1].reshape(-1))  ## Layerwise expert selected.
        dataset_row.append(layer_router_activation)                        
        dataset_row.append(ans['sequences'][0][i])                     ## Generated Output token.
    return dataset_row

In [54]:
model_name = 'switch_t_base_32E'
dataset_name = 'cnn_dailymail_100' 
with SizeCappingFileWriter(f'{model_name}_{dataset_name}', 5*1024*1024) as file:
    writer = csv.writer(file)
    bar = tqdm(enumerate(dataset['train']), total=len(dataset['train']))
    for i, data in bar:
        summary = get_summary(data['text'], data['highlights'])
        writer.writerow(get_db_row(data, summary))
        if i == 1:
            break

  0%|          | 0/287113 [00:00<?, ?it/s]

57


  0%|          | 1/287113 [00:02<212:03:23,  2.66s/it]

78


  0%|          | 1/287113 [00:06<531:27:04,  6.66s/it]
