In [1]:
!nvidia-smi

Fri Nov 17 01:18:47 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.125.06   Driver Version: 525.125.06   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:3B:00.0 Off |                  N/A |
| 49%   68C    P2   345W / 350W |  17707MiB / 24576MiB |    100%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  On   | 00000000:AF:00.0 Off |                  N/A |
| 30%   38C    P2   130W / 350W |    817MiB / 24576MiB |      0%      Default |
|       

In [1]:
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig
)
from datasets import Dataset, load_dataset
import torch
import random
import pandas as pd
import nltk

In [3]:
model_name = "mistralai/Mistral-7B-v0.1"
device="cuda:1"

In [4]:
tokenizer = AutoTokenizer.from_pretrained(f'{model_name}')

In [5]:
model = AutoModelForCausalLM.from_pretrained(f'{model_name}', torch_dtype=torch.float16)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
model.to(device)
print("")




In [7]:
df = pd.read_csv('ngs.csv')

In [8]:
df = df[ ['sentence', 'subordinate-sentence', 'passive-sentence', 'it', 'it-r-1-null_subject', 'it-r-2-passive', 'it-r-3-subordinate', 'it-u-1-negation', 'it-u-2-invert', 'it-u-3-gender', 'jp-r-1-sov', 'jp-r-2-passive', 'jp-r-3-subordinate', 'jp-u-1-negation',    'jp-u-2-invert', 'jp-u-3-past-tense', 'ng-sentence','ng-subordinate-sentence', 'ng-passive-sentence', 'ng-it','ng-it-r-1-null_subject', 'ng-it-r-2-passive', 'ng-it-r-3-subordinate','ng-it-u-1-negation', 'ng-it-u-2-invert', 'ng-it-u-3-gender','ng-jp-r-1-sov', 'ng-jp-r-2-passive', 'ng-jp-r-3-subordinate','ng-jp-u-1-negation', 'ng-jp-u-2-invert', 'ng-jp-u-3-past-tense']]

In [9]:
def parse_answer(text):
    answer = text.split("A:")[-1].strip()
    return answer

def construct_translation_prompt(train_dataset, num_demonstrations, col):
    assert num_demonstrations > 0
    prompt = ''
    train_examples = train_dataset.shuffle().select(range(num_demonstrations))
    for exemplar_num in range(num_demonstrations):
        train_example = train_examples[exemplar_num]
        exemplar = "Transform this sentence. Q: "
        exemplar += train_example['sentence']
        exemplar += "\nA: " + train_example[col]
        exemplar += "\n\n"
        prompt += exemplar
    return prompt

def compute_accuracy(preds, golds):
    assert len(preds) == len(golds)
    total = 0
    correct = 0
    for pred, gold in zip(preds, golds):
        if pred == gold:
            correct += 1
        total += 1
    return correct / total

In [10]:
import sys
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

@torch.no_grad()
def get_aligned_words_measures(text: str, 
                               measure: str,
                               model: GPT2LMHeadModel, 
                               tokenizer: GPT2Tokenizer) -> list[str]:
    """ Returns words and their measure (prob|surp)
    Args:
        text (list[str]): list of sentences
        measure (str): Measure, either probability or surprisal
                        (options: prob|surp)
        model (GPT2LMHeadModel): Pretrained model
        tokenizer (GPT2Tokenizer): Tokenizer
    Returns:
        list[str]: List of words with their measures

    For example, 
    >>> model, tokenizer = load_pretrained_model()
    >>> get_aligned_words_measures('the student is happy', 
    ...        'surp', model, tokenizer)
    [('the', 0), ('student', 17.38616943359375), ('is', 6.385905742645264),
     ('happy', 9.564245223999023)]
    >>> get_aligned_words_measures('the cat is fluffy', 
    ...        'prob', model, tokenizer) 
    [('the', 0), ('cat', 2.5601848392398097e-06), ('is', 0.025296149775385857),
     ('fluffy', 0.00020585735910572112)]
    >>> get_aligned_words_measures('the cat are fluffy', 
    ...        'prob', model, tokenizer)
    [('the', 0), ('cat', 2.5601848392398097e-06), ('are', 0.0010310395155102015),
     ('fluffy', 0.00021902224398218095)]
    """
    if measure not in {'prob', 'surp'}:
        sys.stderr.write(f"{measure} not recognized\n")
        sys.exit(1)

    data = []

    ids = tokenizer(text, return_tensors='pt').to(device)
    input_ids = ids.input_ids.flatten().data
    target_ids = ids.input_ids[:,1:]

    # get output
    logits = model(**ids).logits
    output = torch.nn.functional.log_softmax(logits, dim=-1)
    if measure == 'surp':
        output = -(output/torch.log(torch.tensor(2.0)))
    else:
        output = torch.exp(output)

    # get by token measures 
    target_measures = output[:,:-1, :]
    # use gather to get the output for each target item in the batch
    target_measures = target_measures.gather(-1,
                             target_ids.unsqueeze(2)).flatten().tolist()
    tokens = tokenizer.convert_ids_to_tokens(input_ids)[1:]
    words = text.split(' ')
    # print(words, tokens)
    # A lil loop to force align words 
    current_word = words.pop(0)
    current_token = tokens.pop(0).replace('▁', '')
    measure = 0
    while len(data) != len(text.split(' ')) and len(target_measures) > 0:
        if current_word == current_token:
            data.append((current_word, measure))
            measure = 0
            if words:
                current_word = words.pop(0)
                current_token = tokens.pop(0).replace('▁', '')
                measure += target_measures.pop(0)
        else:
            measure += target_measures.pop(0)
            current_token += tokens.pop(0).replace('▁', '')
            data.append((current_token, measure))

    return data

In [11]:
preds = []
golds = []

f = pd.DataFrame(columns=["type", "prompt", "q", "prediction", "gold", "bleu", "surprisal", "int-grad"])

g = pd.DataFrame(columns=['accuracy', 'type'])

gCols = [col for col in df.columns if not 'ng' in col]

datasets = Dataset.from_pandas(df).train_test_split(test_size=0.2)

master_prompt = 'We will provide you a set of sentences which follow or violate a grammatical structure. \n The sentences may use subjects and objects from the following nouns - author, banana, biscuit, book, bottle, box, boy, bulb, cap, cat, chalk, chapter, cucumber, cup, dog, fish, fruit, girl, Gomu, Harry, hill, John, Leela, man, Maria, meal, mountain, mouse, newspaper, pear, pizza, poem, poet, rock, roof, Sheela, speaker, staircase, story, teacher, Tom, toy, tree, woman, writer.\nThe sentences may use any of the following verbs - brings, carries, claims, climbs, eats, holds, notices, reads, says, sees, states, takes.\n Each noun in a sentence may sometimes use a different determiner than those found in English. Here is a reference of determiners that can be used by nouns: "pear": "kar", "author": "kon", "authors": "kons", "banana": "kar", "biscuit": "kon", "book": "kon", "bottle": "kar", "box": "kar", "boy": "kon", "boys": "kons", "bulb": "kar", "cabinet": "kar", "cap": "kon", "cat": "kon", "cats": "kons", "chapter": "kon", "chalk": "kon", "cup": "kar", "cucumber": "kon", "dog": "kon", "dogs": "kons", "fish": "kon", "fruit": "kar", "girl": "kar", "girls": "kars", "hill": "kar", "man": "kon", "men": "kons", "meal": "kon", "mountain": "kar", "mouse": "kon", "newspaper": "kon", "pizza": "kar", "poet": "kon", "poets": "kons", "poem": "kar", "rock": "kon", "roof": "kon", "speaker": "kon", "speakers": "kons", "staircase": "kar", "story": "kar", "teacher": "kon", "teachers": "kons", "toy": "kon", "tree": "kar", "woman": "kar", "women": "kars", "writer": "kon", "writers": "kons". Each verb in a sentence may sometimes use the past tense of the verb if it is more appropriate. Here are a set of verbs and their past tenses - "climbs" : "climbed", "reads": "read", "carries": "carried", "eats": "ate", "holds": "held", "takes" :"took", "brings": "brought", "reads": "read", "climb" : "climbed", "read": "read", "carry": "carried", "eat": "ate", "hold": "held", "take" :"took", "bring": "brought", "read": "read"\n The sentences may sometimes use the infinitive forms of a verb. Here are a set of verbs and their infinitives - "climbs" : "to climb", "reads": "to read", "carries": "to carry", "eats": "to eat", "holds": "to hold", "takes" : "to take", "brings": "to bring", "reads": "to read", "climb" : "to climb", "read": "to read", "carry": "to carry", "eat": "to eat", "hold": "to hold", "take" : "to take", "bring": "to bring", "read": "to read". \n The sentences may sometimes use the plural form of a noun. Here are a set of nouns and their plurals - "fish": "fish", "mouse": "mice", "bottle": "bottles", "newspaper": "newspapers", "chalk": "chalks", "box": "boxes", "cap": "caps", "bulb": "bulbs", "cup": "cups", "toy": "toys", "staircase": "staircases", "rock": "rocks", "hill": "hills", "mountain": "mountains", "roof": "roofs", "tree": "trees", "biscuit": "biscuits", "banana": "bananas", "pear": "pears", "meal": "meals", "fruit": "fruits", "cucumber": "cucumbers", "pizza": "pizzas", "book": "books", "poem": "poems", "story": "stories", "chapter": "chapters". \n The sentences may sometimes use the passive form of a verb. Here are a set of verbs and their passive forms - "carries": "carried", "carry": "carried", "holds": "held", "hold": "held", "takes": "taken", "take": "taken", "brings": "brought", "bring": "brought", "climbs": "climbed", "climb": "climbed", "eats": "eaten", "eat": "eaten", "reads": "read", "read": "read"\n\n'

for NUM_DEMONSTRATIONS in range(10, 15, 5):
    train_dataset = datasets['train']
    test_dataset = datasets['test']
    for col in gCols:
        prompt = ''
        printAnswer = False
        for test_sentence in test_dataset:
            prompt = construct_translation_prompt(train_dataset, NUM_DEMONSTRATIONS, col)
            fPrompt = prompt
            # Append test example
            prompt += "Transform this sentence. Q: "
            prompt += test_sentence['sentence']
            prompt += "\nA:"
            
            fQ = "Transform this sentence. Q: " + test_sentence[col] + "\nA:"
            
            golds.append(test_sentence[col])
            fGold = test_sentence[col]

            # Get answer from model
            model_inputs = tokenizer([master_prompt + prompt], return_tensors="pt").to(device)
            answer = model.generate(**model_inputs, pad_token_id=tokenizer.eos_token_id, top_p=0.9, temperature=0.1, max_new_tokens=len(test_sentence[col].split(" ")), do_sample=True)
            answer = tokenizer.batch_decode(answer)[0]
            if printAnswer:
                print(answer)
                print("################## ", len(test_sentence[col].split(" ")))
                printAnswer = False
            preds.append(parse_answer(answer))
            fPrediction = parse_answer(answer)
            fSurprisal = get_aligned_words_measures(parse_answer(answer), "surp", model, tokenizer)
            fBleu = nltk.translate.bleu_score.sentence_bleu([test_sentence[col]], parse_answer(answer))
            f = pd.concat([f, pd.DataFrame([{'type': col, 'prompt': fPrompt, 'q' :fQ, 'prediction': fPrediction, 'gold': fGold, 'bleu': fBleu, 'surprisal': fSurprisal, 'int-grad': 0}])]).reset_index(drop=True)
            

        # Evaluate
        accuracy = compute_accuracy(preds, golds)
        print(f"{col} -- Accuracy: {accuracy:.2f}\n")
        g = pd.concat([g, pd.DataFrame([{ 'type' : col, 'accuracy': f"{accuracy:.2f}"}])])

  next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod(dim=0)


sentence -- Accuracy: 0.63



KeyboardInterrupt: 

In [None]:
f.to_csv('translation-train-test-det.csv')
g.to_csv('translation-train-test-acc.csv')

In [None]:
preds = []
golds = []

filename = "translation-train-train.txt"
f = open(filename, "a")

gCols = [col for col in df.columns if not 'ng' in col]

datasets = Dataset.from_pandnum_demonstrations.train_test_split(test_size=0.2)
print("____________________TRANSLATION (TRAIN TRAIN)____________________\n")
f.write("____________________TRANSLATION (TRAIN TRAIN)____________________\n")
for NUM_DEMONSTRATIONS in range(10, 30, 5):
    print(f"____________________NUM DEMONSTRATIONS = {NUM_DEMONSTRATIONS}\n")
    f.write(f"____________________NUM DEMONSTRATIONS = {NUM_DEMONSTRATIONS}____________________\n")
    train_dataset = datasets['train']
    test_dataset = datasets['test']
    for col in gCols:
        prompt = ''
        printAnswer = False
        for test_sentence in train_dataset:
            prompt = construct_translation_prompt(train_dataset, NUM_DEMONSTRATIONS, col)
            # Append test example
            prompt += "Transform this sentence. Q: "
            prompt += test_sentence['sentence']
            prompt += "\nA:"
            golds.append(test_sentence[col])

            # Get answer from model
            model_inputs = tokenizer([prompt], return_tensors="pt").to(device)
            # answer = model.generate(prompt_tok,
            #                     top_p=0.9, temperature=0.1,
            #                     max_new_tokens=2)
            answer = model.generate(**model_inputs, pad_token_id=tokenizer.eos_token_id, max_new_tokens=len(test_sentence[col].split(" ")), do_sample=True)
            answer = tokenizer.batch_decode(answer)[0]
            if printAnswer:
                print(answer)
                print("################## ", len(test_sentence[col].split(" ")))
                printAnswer = False
            preds.append(parse_answer(answer))

        # Evaluate
        accuracy = compute_accuracy(preds, golds)
        print(f"{col} -- Accuracy: {accuracy:.2f}\n")
        f.write(f"{col} -- Accuracy: {accuracy:.2f}\n")