In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /storage/home/hcoda1/1/helwazzan3/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [3]:
from summac.model_summac import SummaCZS, SummaCConv

model_zs = SummaCZS(granularity="sentence", model_name="vitc", device="cpu") # If you have a GPU: switch to: device="cuda"
model_conv = SummaCConv(models=["vitc"], bins='percentile', granularity="sentence", nli_labels="e", device="cpu", start_file="default", agg="mean")

document = """Scientists are studying Mars to learn about the Red Planet and find landing sites for future missions.
One possible site, known as Arcadia Planitia, is covered instrange sinuous features.
The shapes could be signs that the area is actually made of glaciers, which are large masses of slow-moving ice.
Arcadia Planitia is in Mars' northern lowlands."""

summary1 = "There are strange shape patterns on Arcadia Planitia. The shapes could indicate the area might be made of glaciers. This makes Arcadia Planitia ideal for future missions."
score_zs1 = model_zs.score([document], [summary1])
score_conv1 = model_conv.score([document], [summary1])
print("[Summary 1] SummaCZS Score: %.3f; SummacConv score: %.3f" % (score_zs1["scores"][0], score_conv1["scores"][0])) # [Summary 1] SummaCZS Score: 0.582; SummacConv score: 0.536

summary2 = "There are strange shape patterns on Arcadia Planitia. The shapes could indicate the area might be made of glaciers."
score_zs2 = model_zs.score([document], [summary2])
score_conv2 = model_conv.score([document], [summary2])
print("[Summary 2] SummaCZS Score: %.3f; SummacConv score: %.3f" % (score_zs2["scores"][0], score_conv2["scores"][0])) # [Summary 2] SummaCZS Score: 0.877; SummacConv score: 0.709

<All keys matched successfully>


Downloading tokenizer_config.json: 100%|██████████| 217/217 [00:00<00:00, 892kB/s]
Downloading config.json: 100%|██████████| 1.09k/1.09k [00:00<00:00, 5.33MB/s]
Downloading spiece.model: 100%|██████████| 760k/760k [00:00<00:00, 17.4MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 156/156 [00:00<00:00, 1.06MB/s]
  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
Downloading model.safetensors: 100%|██████████| 235M/235M [00:03<00:00, 76.0MB/s] 
  histograms = torch.FloatTensor(histograms).to(self.device)


[Summary 1] SummaCZS Score: 0.582; SummacConv score: 0.536
[Summary 2] SummaCZS Score: 0.877; SummacConv score: 0.709


In [None]:
import pandas as pd
import os 

data_dir = os.path.join(os.path.dirname(os.getcwd()), 'data')

val_df = pd.read_csv(os.path.join(data_dir, 'combined', 'val.csv'))
test_df = pd.read_csv(os.path.join(data_dir, 'combined', 'test.csv'))

In [None]:
len(val_df), len(test_df) 

(84152, 38224)

In [None]:
val_df.columns

Index(['id', 'grounding', 'generated_text', 'label', 'cut', 'dataset_origin'], dtype='object')

In [None]:
train_data = val_df[(val_df.cut == 'val') & (~val_df['dataset_origin'].isin(['Vitamin C', 'Fever']))]
test_data = test_df[(test_df.cut == 'test') & (~test_df['dataset_origin'].isin(['Vitamin C', 'Fever']))]


In [None]:
len(train_data), len(test_data)

(25797, 13527)

In [None]:
train_data.columns

Index(['id', 'grounding', 'generated_text', 'label', 'cut', 'dataset_origin'], dtype='object')

In [None]:
train_data.head(1)

Unnamed: 0,id,grounding,generated_text,label,cut,dataset_origin
0,qags_cnndm1,( cnn ) after more than nine years of travelin...,New horizons will shed light on a third zone o...,1,val,qags_cnndm


In [None]:
train_grounding_list = list(train_data['grounding'])
train_generated_list = list(train_data['generated_text'])

test_grounding_list = list(test_data['grounding'])
test_generated_list = list(test_data['generated_text'])

In [None]:
train_grounding_list[0]

"( cnn ) after more than nine years of traveling through the solar system, nasa's new horizons spacecraft has sent back its first color image of pluto. The initial picture released on tuesday shows a couple of orange - tinged blobs : pluto and its largest moon, charon. But the probe will soon be beaming back much sharper images and a wealth of other information about pluto's remote, unexplored corner of the solar system. ` `this is pure exploration ; we're going to turn points of light into a planet and a system of moons before your eyes,'' said alan stern, new horizons' principal investigator. Launched in 2006, new horizons is nearing the crucial point in its epic voyage of more than 3 billion miles. The probe is due to make its closest approach to pluto on july 14. ` `in an unprecedented flyby this july, our knowledge of what the pluto system is really like will expand exponentially, and i have no doubt there will be exciting discoveries,'' said john grunsfeld, an astronaut and assoc

In [None]:
train_generated_list[0]

'New horizons will shed light on a third zone of the solar system. Pluto and its largest moon, charon. The probe is due to make its closest approach to pluto on july 14.'

In [None]:
from summac.model_summac import SummaCConv
# one-shot using the summac summacconv model
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_conv = SummaCConv(models=["vitc"], bins='percentile', granularity="sentence", nli_labels="e", device=device, start_file="default", agg="mean")

scores = []
for grounding, generated in zip(train_grounding_list, train_generated_list):
    score_conv = model_conv.score([grounding], [generated])
    print("Score: %.3f" % (score_conv["scores"][0])) 
    scores.append(score_conv["scores"][0])
print("Average score: %.3f" % (sum(scores)/len(scores))) 

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


<All keys matched successfully>


  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
  histograms = torch.FloatTensor(histograms).to(self.device)


Score: 0.791
