# WNC Full Dataset Evaluation

In [1]:
train_samples = 154197
epochs = 10
batch_size = 8

In [3]:
(train_samples / batch_size) * epochs

192746.25

## Load Fine-Tuned Model

In [2]:
import os
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, DataCollatorForSeq2Seq
from datasets import load_from_disk, load_metric

# %load_ext lab_black
# %load_ext tensorboard

In [3]:
DATASETS_PATH = "/home/cdsw/data/processed/WNC_seq2seq_full"
MODEL_PATH = "/home/cdsw/models/bart-tst-full"

wnc_datasets = load_from_disk(DATASETS_PATH)
# tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
# model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_PATH)

In [4]:
wnc_datasets

DatasetDict({
    train: Dataset({
        features: ['rev_id', 'source_text', 'target_text'],
        num_rows: 154197
    })
    test: Dataset({
        features: ['rev_id', 'source_text', 'target_text'],
        num_rows: 8577
    })
    validation: Dataset({
        features: ['rev_id', 'source_text', 'target_text'],
        num_rows: 8607
    })
})

In [5]:
wnc_datasets['train'].to_pandas()

Unnamed: 0,rev_id,source_text,target_text
0,118150338,while for long nearly only women where shown a...,"increased tolerance, more tempered censorship,..."
1,304516511,following the end of kenneth kaunda's repressi...,following the end of kenneth kaunda's presiden...
2,16973997,a brilliant quarterback with the university of...,"a quarterback with the university of illinois,..."
3,73550351,traitor to his people adam yahiye gadahn (born...,"adam yahiye gadahn (born september 1, 1978) is..."
4,284110112,a funny thing happened on the way to the moon ...,a funny thing happened on the way to the moon ...
...,...,...,...
154192,12673529,"like many canadian leaders, he married a wife ...","like many canadian leaders, he married a stron..."
154193,397111944,sri lanka won the 1996 cricket world cup by de...,sri lanka won the 1996 cricket world cup by de...
154194,221816193,regardless of how a received message is format...,regardless of how a received message is format...
154195,160860061,"in peloponnesos, at any rate, the revolution h...","in peloponnesos, at any rate, the revolution h..."


## Evaluation Walkthrough

In [3]:
wnc_datasets["test"][:2]

{'rev_id': ['582489816', '660141066'],
 'source_text': ['allegations of apocrypha by opponents of the ppaca',
  'mcgowan had an excellent rookie year and was named golf digest / rolex rookie-of-the-year in 1978.'],
 'target_text': ['allegations by opponents of the ppaca',
  'mcgowan was named golf digest / rolex rookie-of-the-year in 1978.']}

In [5]:
examples = wnc_datasets["test"][10:30]
examples

{'rev_id': ['52689715',
  '448794606',
  '196857981',
  '825528498',
  '131233720',
  '146585550',
  '771218377',
  '89278271',
  '166767775',
  '186439773',
  '18098702',
  '70113581',
  '59929686',
  '204627922',
  '364872143',
  '100032595',
  '775580555',
  '695323829',
  '76214091',
  '460054252'],
 'source_text': ['according to the best current estimates, nazi germany murdered about 11 million people in the holocaust, including nearly 6 million jews .',
  'although both the balfour declaration and the terms of the league of nations british mandate of palestine called for a jewish national home in palestine, the british repudiated any linkage between palestine and the situation of european jews.',
  "it also marked the last season in quarteback 's brett favre illustrious career as a packer .",
  'rigidly arbitrary interpretations, represented as being the literal meaning of religious texts , is the greatest cause of conflict with evolutionary and cosmological investigations and co

In [7]:
examples['target_text'][2]

"it also marked the last season in quarterback 's brett favre career as a packer."

In [3]:
testing = {
    "rev_id": 123,
    "source_text": ["Sir Alex Ferguson is the greatest football manager of all time."],
    "target_text": ["yada"],
}

testing = {
    "rev_id": 123,
    "source_text": ["the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident and a biased and eerily thick-headed woman ."],
    "target_text": ["yada"],
}

In [4]:
max_source_length = 1024
max_target_length = 1024


def preprocess_function(examples: dict):

    inputs = examples["source_text"]
    targets = examples["target_text"]

    model_inputs = tokenizer(
        inputs,
        max_length=max_source_length,
        padding=True,
        truncation=True,
        return_tensors="pt",
    )

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            targets,
            max_length=max_target_length,
            padding=True,
            truncation=True,
            return_tensors="pt",
        )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [5]:
# model_inputs = preprocess_function(examples)
model_inputs = preprocess_function(testing)

In [6]:
model_inputs["input_ids"].shape

torch.Size([1, 48])

### Generate Text

In [7]:
def generate_text(examples: dict):

    model_inputs = preprocess_function(examples)

    outputs = model.generate(
        model_inputs["input_ids"],
        max_length=max_target_length,
        min_length=4,
        length_penalty=2,
        num_beams=4,
        early_stopping=True,
    )

    return tokenizer.batch_decode(outputs, skip_special_tokens=True)

In [20]:
preds = generate_text(examples)
preds

['opposition to apocrypha by opponents of the ppaca',
 'mcgowan had a good rookie year and was named golf digest / rolex rookie-of-the-year in 1978.',
 'hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their talents.',
 'in the north-east of aunis there is a huge forest of hardwood trees, the forest of benon, which has been protected because it is unique to the region.',
 'on march 8, 2009, the impeccable, while monitoring submarine activity 75\xa0miles south of hainan, china, was attacked by several chinese naval ships.',
 'the new term is designed to avoid the social stigma associated with the conflation of "manic" and "depression."',
 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident.',
 'the area in which the palestine pound circulated was divided into several political entities: the state of israel, the hashemite kingdom of trans

In [11]:
preds = generate_text(testing)
preds

['Sir Alex Ferguson is one of the greatest football managers of all time.']

In [8]:
preds = generate_text(testing)
preds

['the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident.']

In [21]:
labels = examples["target_text"]
labels

['allegations by opponents of the ppaca',
 'mcgowan was named golf digest / rolex rookie-of-the-year in 1978.',
 'hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their talents.',
 'in the north-east of aunis there is a huge forest of hardwood trees, the forest of benon, which has been protected because it is unique to the region.',
 'on march 8, 2009, the impeccable, while monitoring submarine activity 75\xa0miles south of hainan, china, was engaged by several chinese naval ships.',
 'the new term is designed to avoid the social stigma associated with the conflation of "manic" and "depression."',
 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident and outspoken environmentalist .',
 'the area in which the palestine pound circulated was divided into several political entities: the state of israel, the hashemite kingdom of transjordan, 

In [25]:
inputs = examples["source_text"]
inputs

['allegations of apocrypha by opponents of the ppaca',
 'mcgowan had an excellent rookie year and was named golf digest / rolex rookie-of-the-year in 1978.',
 'hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their amazing talents.',
 'in the north-east of aunis there is a huge forest of hardwood trees, the forest of benon, which fortunately has been protected because it is unique to the region.',
 'on march 8, 2009, the impeccable, while monitoring submarine activity 75\xa0miles south of hainan, china, was harassed by several chinese naval ships.',
 'the new term is designed to be neutral, to avoid the social stigma associated with the conflation of "manic" and "depression."',
 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident and a biased and eerily thick-headed woman .',
 'the area in which the palestine pound circulated was divide

In [28]:
for i, (inp, pred, label) in enumerate(zip(inputs, preds, labels)):
    print(i, "\n")
    print(f"INPUT: {inp}")
    print()
    print(f"TRUTH: {label}")
    print()
    print(f"PREDI: {pred}")
    print()
    print("-------------------------------------------------")

0 

INPUT: allegations of apocrypha by opponents of the ppaca

TRUTH: allegations by opponents of the ppaca

PREDI: opposition to apocrypha by opponents of the ppaca

-------------------------------------------------
1 

INPUT: mcgowan had an excellent rookie year and was named golf digest / rolex rookie-of-the-year in 1978.

TRUTH: mcgowan was named golf digest / rolex rookie-of-the-year in 1978.

PREDI: mcgowan had a good rookie year and was named golf digest / rolex rookie-of-the-year in 1978.

-------------------------------------------------
2 

INPUT: hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their amazing talents.

TRUTH: hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their talents.

PREDI: hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their talents.



**Interesting Ones**: 1, 3, 6

### Open Questions:
1. Why does eval loss increase while eval accuracy/bleu also increase?
    - "In typical text generation settings, there exists a discrepancy between the training objective and evaluation criteria." 
    - [this paper](https://sailinglab.github.io/pgm-spring-2019/assets/project/final-reports/project3.pdf) introduces DEBLEU which is differentiable version of BLEU as loss function specifically for TST
2. What does performance look like across cohorts (length_delta, generation length)??   
3. [This paper] analyzes and discusses various TST evaluation metrics and how the correlate to human level evaluation.

# Seq2seq with HF Pipeline

In [1]:
import torch
from transformers import pipeline

# %load_ext lab_black

In [5]:
MODEL_PATH = "/home/cdsw/models/bart-tst-full"

# tst_generator = pipeline(task="text2text-generation", model=MODEL_PATH, device=-1, max_length=100)

In [1]:
example = "Sir Alex Ferguson is the greatest football manager of all time."
examples = ["Sir Alex Ferguson is the greatest football manager of all time.", "the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident and a biased and eerily thick-headed woman ."]
# example = "hcis also organizes a yearly talent competition called the innofest where students in hcis get an opportunity to showcase their amazing talents."


# out = tst_generator(examples)



In [6]:
out

[{'generated_text': 'Sir Alex Ferguson is one of the greatest football managers of all time.'},
 {'generated_text': 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident.'}]

In [5]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [10]:
torch.cuda.is_available()

True

In [12]:
torch.cuda.get_device_name()

'Tesla V100-PCIE-32GB'

In [13]:
torch.cuda.current_device()

0

In [None]:
if torch.cuda.is_available():
    device = torch.cuda.current_device() if torch.cuda.is_available() else N
    

In [None]:
device = torch.cuda.current_device() if torch.cuda.is_available() else -1

In [4]:
out

[{'generated_text': 'Sir Alex Ferguson is one of the greatest football managers of all time.'},
 {'generated_text': 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident.'}]

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from src.inference import SubjectivityNeutralizer

In [6]:
sn = SubjectivityNeutralizer(model_identifier=MODEL_PATH)

In [7]:
sn.neutralize(examples)

[{'generated_text': 'Sir Alex Ferguson is one of the greatest football managers of all time.'},
 {'generated_text': 'the news-press was sold by the new york times company in 2000, and is now independently owned by wendy p. mccaw, a local resident.'}]

In [None]:
sn.neutralize(