pandas==2.2.2
transformers==4.29.0
torch==2.3.0
datasets==2.19.0
accelerate
openprompt>=1.0.1
scikit-learn

In [1]:
!pip install openprompt==1.0.1 transformers==4.21.1 pandas scikit-learn torch==2.3.0 datasets==2.19.0 accelerate

Collecting openprompt==1.0.1
  Downloading openprompt-1.0.1-py3-none-any.whl (146 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m146.4/146.4 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers==4.21.1
  Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.7/4.7 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
Collecting datasets==2.19.0
  Downloading datasets-2.19.0-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m43.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.31.0-py3-none-any.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.4/309.4 kB[0m [31m31.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting sentencepiece==0.1.96 (from openprompt==1.0.1)
  Downloading sentencepiece-0.1.96-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (

In [2]:
!pip install importlib-metadata



In [3]:
%cd /content/drive/MyDrive/ANLP_indiv_project

/content/drive/MyDrive/ANLP_indiv_project


In [4]:
import pandas as pd
import torch
from sklearn.metrics import f1_score
from openprompt.plms import load_plm
from openprompt.prompts import ManualTemplate
from openprompt.data_utils import InputExample
from openprompt.prompts import ManualVerbalizer
from openprompt import PromptForClassification
from openprompt import PromptDataLoader
from datasets import load_dataset

In [17]:
import logging
# Initialize logging
logging.basicConfig(level=logging.INFO)

#Checking encoder LLM ZSL

In [34]:
class prompting:
    def __init__(self, model="roberta-base"):
        if model == "roberta-base":
            self.checkpoint = ("roberta", "roberta-base")
        elif model == "roberta-large":
            self.checkpoint = ("roberta", "roberta-large")
        elif model == "bert":
            self.checkpoint = ("bert", "bert-base-uncased")
        elif model == "deberta-base":
            self.checkpoint = ("deberta-v3", "microsoft/deberta-v3-base")
        elif model == "deberta-large":
            self.checkpoint = ("deberta-v3", "microsoft/deberta-v3-large")
        elif model == "xlm-roberta":
            self.checkpoint = ("xlm-roberta-base", "xlm-roberta-base")
        else:
            raise Exception("Select one of the following models: roberta-base, roberta-large, bert, deberta-base, deberta-large, xlm")

    def predict(self, template, verb_h, verb_nh, data):
        plm, tokenizer, model_config, WrapperClass = load_plm(self.checkpoint[0], self.checkpoint[1])

        promptTemplate = ManualTemplate(
            text=f'{{"placeholder":"text_a"}} {template} {{"mask"}}',
            tokenizer=tokenizer,
        )

        classes = ["1", "0"]

        label_words = {
            "1": verb_h,
            "0": verb_nh
        }

        if isinstance(data, str):
            dataset = [InputExample(guid=0, text_a=data)]
        elif isinstance(data, pd.DataFrame):
            dataset = [InputExample(guid=i, text_a=txt) for i, txt in enumerate(data["text"])]
        elif isinstance(data, list) and all(isinstance(t, str) for t in data):
            dataset = [InputExample(guid=i, text_a=txt) for i, txt in enumerate(data)]
        else:
            raise ValueError('Input data must be either a string or a pandas DataFrame.')

        promptVerbalizer = ManualVerbalizer(
            classes=classes,
            label_words=label_words,
            tokenizer=tokenizer,
        )

        promptModel = PromptForClassification(
            template=promptTemplate,
            plm=plm,
            verbalizer=promptVerbalizer,
        )

        data_loader = PromptDataLoader(
            dataset=dataset,
            tokenizer=tokenizer,
            template=promptTemplate,
            tokenizer_wrapper_class=WrapperClass
        )

        predictions = []

        promptModel.eval()
        with torch.no_grad():
            for batch in data_loader:
                logits = promptModel(batch)
                preds = torch.argmax(logits, dim=-1)
                predictions.extend([classes[p] for p in preds.cpu().numpy().tolist()])

        mapper = {"0": "non-hate", "1": "hate"}

        return [mapper[k] for k in predictions]

# Load datasets
def load_datasets():
    datasets = {
        "DAVIDSON": "datasets/sampled_DAVIDSON.csv",
        "DYNABENCH": "datasets/sampled_DYNABENCH.csv",
        "MHS": "datasets/sampled_MHS.csv",
        "MLMA": "datasets/sampled_MLMA.csv"
    }
    return datasets

# Function to calculate macro-F1 score
def calculate_macro_f1(labels, preds):
    return f1_score(labels, preds, average='macro')

def tokenize_batch(batch, tokenizer):
    try:
        return tokenizer(batch, padding=True, truncation=True)
    except Exception as e:
        logging.error(f"Error tokenizing batch: {e}")
        return None

In [None]:
# Main function to run the evaluation
def main():
    models = ["roberta-base", "roberta-large", "bert", "deberta-base", "deberta-large", "xlm-roberta"]
    datasets = load_datasets()
    template = "This text is"
    verb_h = ["toxic"]
    verb_nh = ["respectful"]

    results = []

    for model in models:
        prompt_model = prompting(model=model)
        for name, data in datasets.items():
            predictions = prompt_model.predict(template, verb_h, verb_nh, data)
            labels = data['label'].tolist()
            macro_f1 = calculate_macro_f1(labels, predictions)
            results.append((model, name, macro_f1))
            print(f"Macro-F1 Score for {model} on {name}: {macro_f1}")

    # Print all results
    for result in results:
        model, dataset_name, macro_f1 = result
        print(f"Model: {model}, Dataset: {dataset_name}, Macro-F1 Score: {macro_f1}")


In [10]:
!pwd

/content/drive/MyDrive/ANLP_indiv_project


In [None]:

if __name__ == "__main__":
    main()

tokenizing: 24783it [00:32, 765.59it/s]


Separating the model inferences

In [37]:
from transformers import AutoTokenizer
def roberta_base_inference():
  model = "roberta-base"
  datasets= load_datasets()
  template = "This text is"
  verb_h = ["toxic"]
  verb_nh = ["respectful"]
  tokenizer = AutoTokenizer.from_pretrained(model)

  results = []

  prompt_model = prompting(model=model)
  for name, path in datasets.items():
          try:
              logging.info(f"Loading dataset {name} from {path}")
              data = pd.read_csv(path)
          except Exception as e:
              logging.error(f"Error loading dataset {name} from {path}: {e}")
              continue

          logging.info(f"Processing dataset {name}, type: {type(data)}")
          if not isinstance(data, pd.DataFrame):
              logging.error(f"Error: {name} is not a DataFrame")
              continue

          batch_size = 1000
          for i in range(0, len(data), batch_size):
              batch = data.iloc[i:i+batch_size]
              logging.info(f"Tokenizing batch {i // batch_size + 1}")
              texts = batch['text'].tolist()  # Ensure this is a list of strings

              # Log the type and length of the texts for debugging
              logging.info(f"Type of texts: {type(texts)}, length of texts: {len(texts)}")

              predictions = prompt_model.predict(template, verb_h, verb_nh, texts)
              labels = batch['label'].tolist()
              # Process predictions and labels as needed
              logging.info(f"Processed batch {i // batch_size + 1}")

In [15]:
print(load_datasets().items)

Loading dataset from datasets/processed_DAVIDSON.csv
Loading dataset from datasets/processed_DYNABENCH.csv
Loading dataset from datasets/processed_MHS.csv
Loading dataset from datasets/processed_MLMA.csv
<built-in method items of dict object at 0x7fb0bedd4dc0>


In [38]:
roberta_base_inference()

tokenizing: 1000it [00:01, 803.90it/s]


KeyboardInterrupt: 