In [None]:
import warnings
warnings.filterwarnings('ignore')
import locale
locale.getpreferredencoding = lambda x=False: "UTF-8"
! pip install -q transformers datasets evaluate
from transformers import AutoModelForCausalLM, BloomTokenizerFast, set_seed
# device = 'cuda'
import torch
import gc
device = 'cuda'
from datasets import load_dataset

In [None]:
def get_model(model_name):
  model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
  return model

def tokenize(text):
   return tokenizer.encode(text, return_tensors="pt").to( device)

tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom")


In [None]:
import numpy as np

def get_classification(model, example):
    text = example.get('text')
    inputs = tokenizer.encode(text, return_tensors="pt").to(device)
    outputs = model.generate(inputs, return_dict_in_generate=True, min_new_tokens=1, max_new_tokens=10, output_scores=True)
    generated_tokens = outputs.sequences[0][inputs.shape[1]:]
    response = tokenizer.decode(generated_tokens)
    if example.get('answers').get('text')[0] in response:
        prediction = 1
    else:
        prediction = 0
    example['prediction'] = prediction
    example['response'] = response
    example['label'] = 1
    return example

def classify_tweets(model, dataset):
    return dataset.map(lambda example: get_classification(model, example))

In [None]:
models = ['bigscience/bloomz-1b7', 'bigscience/bloom-1b7', 'alonzogarbanzo/Bloom-1b7-ropes-IT-baseline', 'alonzogarbanzo/Bloom-1b7-winograd-wsc-IT-baseline', "alonzogarbanzo/Bloom-1b7-ropes-Cont-IT-Step2", "alonzogarbanzo/Bloom-1b7-glue-mrpc-Cont-IT-Step3", "alonzogarbanzo/Bloom-1b7-dialogsum-Cont-IT-Step4", "alonzogarbanzo/Bloom-1b7-creative-writing-Cont-IT-Step5"]

In [None]:
import evaluate
def calc_f1_score(dataset):
    predictions = dataset['prediction']
    references = dataset['label']
    f1 = evaluate.load("f1")
    return f1.compute(predictions=predictions, references=references, average='macro')

def calc_accuracy(dataset):
    predictions = dataset['prediction']
    references = dataset['label']
    accuracy = evaluate.load("accuracy")
    return accuracy.compute(predictions=predictions, references=references)

def calc_precision(dataset):
    predictions = dataset['prediction']
    references = dataset['label']
    precision = evaluate.load("precision")
    return precision.compute(predictions=predictions, references=references, average='macro')

def calc_recall(dataset):
    predictions = dataset['prediction']
    references = dataset['label']
    recall = evaluate.load("recall")
    return recall.compute(predictions=predictions, references=references , average='macro' )


In [None]:
import json
def evaluate_models(models, dataset_name, dataset):
  evaluation_data = {model_name: {} for model_name in models}
  evaluation_metrics = {model_name: {} for model_name in models}
  for model_name in models:
    model = get_model(model_name)
    prediction_dataset = classify_tweets(model, dataset)
    evaluation_data[model_name]['f1'] = f1 = calc_f1_score(prediction_dataset)
    evaluation_data[model_name]['accuracy'] = accuracy = calc_accuracy(prediction_dataset)
    evaluation_data[model_name]['precision'] = precision = calc_precision(prediction_dataset)
    evaluation_data[model_name]['recall'] = recall = calc_recall(prediction_dataset)
    prediction_dataset.to_csv(f'./ropes/{model_name[15:]}_{dataset_name}.csv')
    evaluation_metrics[model_name]['f1'] = f1.get('f1')
    evaluation_metrics[model_name]['accuracy'] = accuracy.get('accuracy')
    evaluation_metrics[model_name]['precision'] = precision.get('precision')
    evaluation_metrics[model_name]['recall'] = recall.get('recall')
    del model
    gc.collect()
    torch.cuda.empty_cache()
    print(f'{model_name} {dataset_name} evaluation complete.\n {evaluation_metrics[model_name]}')

    with open(f'./ropes/evaluation_metrics_{dataset_name}.json', 'w') as f:
      json.dump(evaluation_metrics, f, indent=4)
  return evaluation_data

In [None]:
def transform_dataset(dataset, transform_function):
  transformed_data = dataset.map(lambda example: transform_function(example))
  return transformed_data

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

def display_cm(dataset, model_name):
  predictions = dataset['prediction']
  references = dataset['label']
  class_labels = [0, 1, 2]
  cm = confusion_matrix(references, predictions, labels=class_labels)
  plt.figure(figsize=(8, 6))
  disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_labels)
  disp.plot()
  plt.title(f'{model_name} Confusion Matrix')
  plt.show()


In [None]:
import random
def transform(example):
    prompts = [
        "Given the following background and situation, answer the question: ",
        "Based on the background information and the current situation, what is the answer to the question? ",
        "Considering the background and the described situation, provide an answer to this question: ",
    ]
    prompt = random.choice(prompts)
    input_text = f"{prompt}Background: {example.get('background')} Situation: {example.get('situation')} Question: {example.get('question')} Answer: "
    example["text"] = input_text
    return example

In [None]:
dataset_name = "ropes"

In [None]:
dataset = load_dataset("adambjorn/UnrelatedForgettingOverhead", data_dir=dataset_name, split="test")


In [None]:
dataset = transform_dataset(dataset, transform)
data = evaluate_models(models, dataset_name, dataset)

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

bigscience/bloomz-1b7 ropes evaluation complete.
 {'f1': 0.4274809160305344, 'accuracy': 0.7466666666666667, 'precision': 0.5, 'recall': 0.37333333333333335}


Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

bigscience/bloom-1b7 ropes evaluation complete.
 {'f1': 0.27884615384615385, 'accuracy': 0.38666666666666666, 'precision': 0.5, 'recall': 0.19333333333333333}


config.json:   0%|          | 0.00/805 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

alonzogarbanzo/Bloom-1b7-ropes-IT-baseline ropes evaluation complete.
 {'f1': 0.3697478991596639, 'accuracy': 0.5866666666666667, 'precision': 0.5, 'recall': 0.29333333333333333}


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

alonzogarbanzo/Bloom-1b7-winograd-wsc-IT-baseline ropes evaluation complete.
 {'f1': 0.0625, 'accuracy': 0.06666666666666667, 'precision': 0.5, 'recall': 0.03333333333333333}


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

alonzogarbanzo/Bloom-1b7-ropes-Cont-IT-Step2 ropes evaluation complete.
 {'f1': 0.364406779661017, 'accuracy': 0.5733333333333334, 'precision': 0.5, 'recall': 0.2866666666666667}


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

alonzogarbanzo/Bloom-1b7-glue-mrpc-Cont-IT-Step3 ropes evaluation complete.
 {'f1': 0.025974025974025976, 'accuracy': 0.02666666666666667, 'precision': 0.5, 'recall': 0.013333333333333334}


Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

alonzogarbanzo/Bloom-1b7-dialogsum-Cont-IT-Step4 ropes evaluation complete.
 {'f1': 0.2718446601941748, 'accuracy': 0.37333333333333335, 'precision': 0.5, 'recall': 0.18666666666666668}


config.json:   0%|          | 0.00/833 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.5k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.95G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Map:   0%|          | 0/75 [00:00<?, ? examples/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

alonzogarbanzo/Bloom-1b7-creative-writing-Cont-IT-Step5 ropes evaluation complete.
 {'f1': 0.2346938775510204, 'accuracy': 0.30666666666666664, 'precision': 0.5, 'recall': 0.15333333333333332}


In [None]:
import os
import zipfile

def zip_directory(folder_path, output_filename):
    # Create a zipfile object in write mode
    with zipfile.ZipFile(output_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        # The os.walk function allows us to walk through the directory tree
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                # Create the file path by joining the directory and file name
                file_path = os.path.join(root, file)
                # Create the archive name, which is the path within the zip file
                # This is the path relative to the folder we are zipping
                arcname = os.path.relpath(file_path, os.path.dirname(folder_path))
                # Write the file to the zipfile
                zipf.write(file_path, arcname)

# Example usage
folder_path = './ropes'  # Replace with the path to your directory
output_filename = 'ropes.zip'  # Replace with your desired output zip file name
zip_directory(folder_path, output_filename)
