## F1 score on bigrams

In [1]:
import re
import string
import json
from nltk.util import bigrams
from collections import Counter
import csv

regex_punctuation = re.compile('[%s]' % re.escape(string.punctuation))

def string_to_tokens(s):
    return ['<bos>'] + re.sub(regex_punctuation, '', s.lower()).split() + ['<eos>']

def calculate_f1_score(pred_bigrams, gt_bigrams):
    shared_ngrams = Counter(pred_bigrams) & Counter(gt_bigrams)
    num_same = sum(shared_ngrams.values())
    
    if num_same == 0:
        return 0
    
    precision = 1.0 * num_same / len(pred_bigrams)
    recall = 1.0 * num_same / len(gt_bigrams)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

# Load data from JSON file
with open('RandomNamesNEW.json', 'r') as json_file:
    data = json.load(json_file)

# Open CSV file for writing results
csv_filename = 'f1_scores.csv'
with open(csv_filename, 'w', newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['Name', 'Generated Text', 'F1 Score'])

    for entry in data:
        name = entry['name']
        gt = entry['existing_text']
        pred_texts = entry['generated_texts']
        
        gt_words = string_to_tokens(gt)
        gt_bigrams = list(bigrams(gt_words))
        
        for pred in pred_texts:
            pred_words = string_to_tokens(pred)
            pred_bigrams = list(bigrams(pred_words))
            
            f1_score = calculate_f1_score(pred_bigrams, gt_bigrams)
            
            # Write the results to the CSV file
            csv_writer.writerow([name, pred, f1_score])

print(f"Results saved in {csv_filename}")


ValueError: Found input variables with inconsistent numbers of samples: [28, 32]

In [1]:
import re
import string
from nltk.util import bigrams
from collections import Counter

regex_punctuation = re.compile('[%s]' % re.escape(string.punctuation))

gt = "Ben Shneiderman is an active and longtime research contributor with more than 400 publications  since 1972. Shneiderman's published work includes 208 journal articles  and 197 proceedings papers"
pred = "Ben Shneiderman is a highly productive scientist and computer scientist. He has published over 400 works since 1972, including 208 journal articles and 197 proceedings papers. He is well known for his pioneering work on human-computer interaction and information visualization. He is a Professor of Computer Science at the University of Maryland and founding Director of the Human-Computer Interaction Lab. He has won numerous awards and honors, including the Association for Computing Machinery's Software System Award and the ACM CHI Lifetime."

def string_to_tokens(s):
    '''
    This is a very basic way of tokenizing your text.
    Probably not useful for real life datasets.
    '''
    return ['<bos>'] + re.sub(regex_punctuation, '' , s.lower()).split() + ['<eos>']


gt_words = string_to_tokens(gt)
pred_words = string_to_tokens(pred)
print(gt_words, pred_words)


gt_bigrams = list(bigrams(gt_words)) 
pred_bigrams = list(bigrams(pred_words)) 
print(gt_bigrams, pred_bigrams)

def f1(pred_bigrams, gt_bigrams):
    shared_ngrams = Counter(pred_bigrams) & Counter(gt_bigrams)
    num_same = sum(shared_ngrams.values())
    
    if num_same == 0: return 0
    precision = 1.0 * num_same / len(pred_bigrams)
    recall = 1.0 * num_same / len(gt_bigrams)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1


f1(pred_bigrams, gt_bigrams)

In [2]:
gt = "Ben Shneiderman is an active and longtime research contributor with more than 400 publications  since 1972. Shneiderman's published work includes 208 journal articles  and 197 proceedings papers"
pred = "Ben Shneiderman is a highly productive scientist and computer scientist. He has published over 400 works since 1972, including 208 journal articles and 197 proceedings papers. He is well known for his pioneering work on human-computer interaction and information visualization. He is a Professor of Computer Science at the University of Maryland and founding Director of the Human-Computer Interaction Lab. He has won numerous awards and honors, including the Association for Computing Machinery's Software System Award and the ACM CHI Lifetime."

In [3]:
def string_to_tokens(s):
    '''
    This is a very basic way of tokenizing your text.
    Probably not useful for real life datasets.
    '''
    return ['<bos>'] + re.sub(regex_punctuation, '' , s.lower()).split() + ['<eos>']

In [4]:
gt_words = string_to_tokens(gt)
pred_words = string_to_tokens(pred)
print(gt_words, pred_words)

['<bos>', 'ben', 'shneiderman', 'is', 'an', 'active', 'and', 'longtime', 'research', 'contributor', 'with', 'more', 'than', '400', 'publications', 'since', '1972', 'shneidermans', 'published', 'work', 'includes', '208', 'journal', 'articles', 'and', '197', 'proceedings', 'papers', '<eos>'] ['<bos>', 'ben', 'shneiderman', 'is', 'a', 'highly', 'productive', 'scientist', 'and', 'computer', 'scientist', 'he', 'has', 'published', 'over', '400', 'works', 'since', '1972', 'including', '208', 'journal', 'articles', 'and', '197', 'proceedings', 'papers', 'he', 'is', 'well', 'known', 'for', 'his', 'pioneering', 'work', 'on', 'humancomputer', 'interaction', 'and', 'information', 'visualization', 'he', 'is', 'a', 'professor', 'of', 'computer', 'science', 'at', 'the', 'university', 'of', 'maryland', 'and', 'founding', 'director', 'of', 'the', 'humancomputer', 'interaction', 'lab', 'he', 'has', 'won', 'numerous', 'awards', 'and', 'honors', 'including', 'the', 'association', 'for', 'computing', 'mach

In [5]:
gt_bigrams = list(bigrams(gt_words)) 
pred_bigrams = list(bigrams(pred_words)) 
print(gt_bigrams, pred_bigrams)

[('<bos>', 'ben'), ('ben', 'shneiderman'), ('shneiderman', 'is'), ('is', 'an'), ('an', 'active'), ('active', 'and'), ('and', 'longtime'), ('longtime', 'research'), ('research', 'contributor'), ('contributor', 'with'), ('with', 'more'), ('more', 'than'), ('than', '400'), ('400', 'publications'), ('publications', 'since'), ('since', '1972'), ('1972', 'shneidermans'), ('shneidermans', 'published'), ('published', 'work'), ('work', 'includes'), ('includes', '208'), ('208', 'journal'), ('journal', 'articles'), ('articles', 'and'), ('and', '197'), ('197', 'proceedings'), ('proceedings', 'papers'), ('papers', '<eos>')] [('<bos>', 'ben'), ('ben', 'shneiderman'), ('shneiderman', 'is'), ('is', 'a'), ('a', 'highly'), ('highly', 'productive'), ('productive', 'scientist'), ('scientist', 'and'), ('and', 'computer'), ('computer', 'scientist'), ('scientist', 'he'), ('he', 'has'), ('has', 'published'), ('published', 'over'), ('over', '400'), ('400', 'works'), ('works', 'since'), ('since', '1972'), ('197

![f1 formula](https://wikimedia.org/api/rest_v1/media/math/render/svg/f5c869c51dba6f1df65a6e6630c516de161632d4)

In [6]:
def f1(pred_bigrams, gt_bigrams):
    shared_ngrams = Counter(pred_bigrams) & Counter(gt_bigrams)
    num_same = sum(shared_ngrams.values())
    
    if num_same == 0: return 0
    precision = 1.0 * num_same / len(pred_bigrams)
    recall = 1.0 * num_same / len(gt_bigrams)
    f1 = (2 * precision * recall) / (precision + recall)
    return f1

In [7]:
f1(pred_bigrams, gt_bigrams)

0.18181818181818182

In [12]:
pred_bigrams = list(bigrams(string_to_tokens('Renowned Computer Scientist (1972-present), 400 Publications, 208 Journal Articles, 197 Proceedings Papers.')))
f1(pred_bigrams, gt_bigrams)

0.2926829268292683

In [13]:
pred_bigrams = list(bigrams(string_to_tokens('Ben Shneiderman is a scientist with over 400 publications since 1972, including 208 journal articles and 197 proceedings papers.')))
f1(pred_bigrams, gt_bigrams)

0.5416666666666667

## BERTscore

In [16]:
!pip install evaluate
!pip install bert_score
!pip install pyarrow==11.0.0

Collecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
                                              0.0/81.4 kB ? eta -:--:--
     -----                                    10.2/81.4 kB ? eta -:--:--
     ---------------------------------------- 81.4/81.4 kB 1.5 MB/s eta 0:00:00
Collecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.14.4-py3-none-any.whl (519 kB)
                                              0.0/519.3 kB ? eta -:--:--
     --                                    41.0/519.3 kB 991.0 kB/s eta 0:00:01
     --------                               112.6/519.3 kB 1.3 MB/s eta 0:00:01
     ------------                           174.1/519.3 kB 1.5 MB/s eta 0:00:01
     -----------------                      245.8/519.3 kB 1.4 MB/s eta 0:00:01
     ----------------------                 307.2/519.3 kB 1.4 MB/s eta 0:00:01
     ----------------------------           389.1/519.3 kB 1.4 MB/s eta 0:00:01
     --------------------------------       440.3


[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
                                              0.0/61.1 kB ? eta -:--:--
     ---------------------------------------- 61.1/61.1 kB 1.6 MB/s eta 0:00:00
Installing collected packages: bert_score
Successfully installed bert_score-0.3.13



[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting pyarrow==11.0.0
  Downloading pyarrow-11.0.0-cp311-cp311-win_amd64.whl (20.5 MB)
                                              0.0/20.5 MB ? eta -:--:--
                                              0.1/20.5 MB 1.9 MB/s eta 0:00:11
                                              0.2/20.5 MB 2.9 MB/s eta 0:00:08
                                              0.4/20.5 MB 3.4 MB/s eta 0:00:06
     -                                        0.7/20.5 MB 3.9 MB/s eta 0:00:06
     -                                        1.0/20.5 MB 4.5 MB/s eta 0:00:05
     --                                       1.3/20.5 MB 4.9 MB/s eta 0:00:04
     ---                                      1.6/20.5 MB 5.2 MB/s eta 0:00:04
     ---                                      1.9/20.5 MB 5.3 MB/s eta 0:00:04
     ----                                     2.2/20.5 MB 5.3 MB/s eta 0:00:04
     ----                                     2.5/20.5 MB 5.5 MB/s eta 0:00:04
     -----                                    


[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [17]:
 import evaluate

bertscore = evaluate.load('bertscore')

  from .autonotebook import tqdm as notebook_tqdm
Downloading builder script: 100%|██████████| 7.95k/7.95k [00:00<00:00, 7.98MB/s]


In [18]:
bertscore.compute(
    predictions=['Ben Shneiderman is an active and longtime research contributor with more than 400 publications  since 1972. Shneidermans published work includes 208 journal articles  and 197 proceedings papers'],
    references=['Ben Shneiderman is a highly productive scientist and computer scientist. He has published over 400 works since 1972, including 208 journal articles and 197 proceedings papers. He is well known for his pioneering work on human-computer interaction and information visualization. He is a Professor of Computer Science at the University of Maryland and founding Director of the Human-Computer Interaction Lab. He has won numerous awards and honors, including the Association for Computing Machinerys Software System Award and the ACM CHI Lifetime..'],
    model_type="distilbert-base-uncased"
)

Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<?, ?B/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading (…)lve/main/config.json: 100%|██████████| 483/483 [00:00<00:00, 453kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 1.13MB/s]
Downloading pytorch_model.bin: 100%|██████████| 268M/268M [01:18<00:00, 3.43MB/s] 


{'precision': [0.906581461429596],
 'recall': [0.7403594255447388],
 'f1': [0.8150821924209595],
 'hashcode': 'distilbert-base-uncased_L5_no-idf_version=0.3.12(hug_trans=4.29.0)'}

In [13]:
bertscore.compute(
    predictions=['Vanilla is the best ice cream flavor in the world.'],
    references=['Chocolate is the best ice cream flavor in the world.'],
    model_type="distilbert-base-uncased"
)

{'precision': [0.9863595962524414],
 'recall': [0.9863595962524414],
 'f1': [0.9863595962524414],
 'hashcode': 'distilbert-base-uncased_L5_no-idf_version=0.3.12(hug_trans=4.29.2)'}

In [14]:
bertscore.compute(
    predictions=['Vanilla is the best ice cream flavor in the world.'],
    references=['Vanilla is not the best ice cream flavor in the world.'],
    model_type="distilbert-base-uncased"
)

{'precision': [0.9881718158721924],
 'recall': [0.9713627099990845],
 'f1': [0.979695200920105],
 'hashcode': 'distilbert-base-uncased_L5_no-idf_version=0.3.12(hug_trans=4.29.2)'}

## Perplexity

In [19]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import torch
from torch import nn
import numpy as np

# Code from a wonderful Kaggle Notebook by Pilipp Singer
# https://www.kaggle.com/code/philippsinger/h2ogpt-perplexity-ranking

class Perplexity(nn.Module):
    def __init__(self, reduce: bool = True):
        super().__init__()
        self.loss_fn = nn.CrossEntropyLoss()
        self.reduce = reduce

    def forward(self, logits, labels):
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = labels[..., 1:].contiguous()

        perplexity = []
        for i in range(labels.shape[0]):
            perplexity.append(self.loss_fn(shift_logits[i], shift_labels[i]))
        perplexity = torch.stack(perplexity, dim=0)
        if self.reduce:
            perplexity = torch.mean(perplexity)
        return perplexity 
    
perp = Perplexity()

In [20]:
def perplexity(model, prompt):
    tokenizer = AutoTokenizer.from_pretrained(model)

    model = AutoModelForCausalLM.from_pretrained(
        model,
        torch_dtype=torch.float16,
        device_map="auto",
        trust_remote_code=True,
    )
    
    with torch.no_grad():
        inputs = tokenizer([prompt], return_tensors="pt", add_special_tokens=False, truncation=True).to("cuda")
        logits = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"]).logits
        labels = inputs["input_ids"]
        return perp(logits[0].unsqueeze(0), labels[0].unsqueeze(0)).item()

In [21]:
prompt = """
    Q: Please describe briefly the following scientific author and consider the following information:\n\nName: {name}\nPublications: {publications}\nJournal Articles: {journal_articles}\nsince year:{since_year}\nProceedings Papers: {proceedings_papers}\n\n
    A: Ben Shneiderman is a highly productive scientist and computer scientist. He has published over 400 works since 1972, including 208 journal articles and 197 proceedings papers. He is well known for his pioneering work on human-computer interaction and information visualization. He is a Professor of Computer Science at the University of Maryland and founding Director of the Human-Computer Interaction Lab. He has won numerous awards and honors, including the Association for Computing Machinery's Software System Award and the ACM CHI Lifetime"
"""

In [23]:
perplexity("tiiuae/falcon-7b", prompt)

ImportError: This modeling file requires the following packages that were not found in your environment: einops. Run `pip install einops`

In [19]:
perplexity("mosaicml/mpt-7b", prompt)

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Instantiating an MPTForCausalLM model from /home/radek/.cache/huggingface/modules/transformers_modules/mosaicml/mpt-7b/72e5f594ce36f9cabfa2a9fd8f58b491eb467ee7/modeling_mpt.py
You are using config.init_device='cpu', but you can also use config.init_device="meta" with Composer + FSDP for fast initialization.


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:06<00:00,  3.07s/it]


1.77734375