In [1]:
import torch
import numpy as np
import datasets
import os
import umap
import evaluate
from pathlib import Path
from itertools import product
from IPython.core.debugger import set_trace
from datasets import Dataset, DatasetDict
from torch import nn
from sentence_transformers import SentenceTransformer
from nltk import sent_tokenize
from IPython.core.debugger import Pdb
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
from transformers import AutoModel, AutoTokenizer
from pprint import pprint

datasets.disable_caching()

# Set this to whatever you want
seed = 10

torch.manual_seed(seed)
np.random.seed(seed)

%load_ext autoreload
%autoreload 2
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


# Load Dataset

In [49]:
ds = Dataset.from_parquet('./data/sofsat_lora.parquet')
print(ds)

Generating train split: 19203 examples [00:00, 70776.12 examples/s]

Dataset({
    features: ['S1', 'S2', 'Sy', 'operation', 'output_raw'],
    num_rows: 19203
})





# Clean Up Output

In [50]:
def map_fn(sample):
    idx = sample['output_raw'].index('</op2>')
    clean_output = sample['output_raw'][idx+6:]
    sample['output'] = clean_output
    return sample

ds = ds.map(map_fn)
print(ds)

Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19203/19203 [00:00<00:00, 24773.18 examples/s]

Dataset({
    features: ['S1', 'S2', 'Sy', 'operation', 'output_raw', 'output'],
    num_rows: 19203
})





# Compute ROUGE and BERTscore

In [51]:
scores = {}

print('computing rouge')
scorer = evaluate.load('rouge')
scores['rouge'] = scorer.compute(
    predictions=ds['output'], 
    references=ds['Sy']
)

print('computing bertscore')
scorer = evaluate.load("bertscore")
bertscore = scorer.compute(
    predictions=ds['output'], 
    references=ds['Sy'],
    lang="en",
    batch_size=100,
    device='cuda:1',
    verbose=True,
    rescale_with_baseline=True,
)
scores['bertscore'] = np.mean(bertscore['f1'])

pprint(scores)

computing rouge
computing bertscore


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 378/378 [01:41<00:00,  3.72it/s]


computing greedy matching.


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 193/193 [00:02<00:00, 76.83it/s]


done in 1395699.05 seconds, 0.01 sentences/sec
{'bertscore': 0.3724939909395789,
 'rouge': {'rouge1': 0.4435674530914756,
           'rouge2': 0.23828892999393597,
           'rougeL': 0.3383353327730062,
           'rougeLsum': 0.34528689903851756}}


# Show an Example

In [62]:
idx = np.random.randint(len(ds))

sample = ds[idx]
print(f'sample: {idx}', end='\n\n')
print(f'**s1**: {sample["S1"]}', end='\n\n')
print(f'**s2**: {sample["S2"]}', end='\n\n')
print(f'**op**: {sample["operation"]}', end='\n\n')
print(f'**Reference**: {sample["Sy"]}', end='\n\n')
print(f'**Prediction**: {sample["output"]}')

sample: 16428

**s1**: Last night's SS15 show had star-studded guests, including Rosie Fortescue and Roxie Nafousi. Yasmin Le Bon led the finale.

**s2**: Yasmin Le Bon lead the finale at the end of the show .

**op**: right_diff

**Reference**: Star studded guests: Rosie Fortescue (left) and Roxie Nafousi (right) attended last night's SS15 show .

**Prediction**: The show was headlined by star-studded guests including Rosie Fortescue and Roxie Nafousi..</op2>Last night's


In [59]:
print(set(ds['operation']))

{'right_diff', 'intersection', 'union', 'left_diff'}


In [None]:
print(ds)