In [3]:
!pip install -r /content/drive/MyDrive/ANLP_indiv_project/requirements.txt



In [4]:
#For fine-tuned LLM

import pandas as pd
import torch
from datasets import Dataset
from tqdm import tqdm

from transformers import (
    AutoModelForSeq2SeqLM,
    AutoTokenizer,
    DataCollatorWithPadding,
)

class prompting_FT:

        def __init__(self, model="flant5"):
            if model == "flant5":
                self.checkpoint = "google/flan-t5-xl"
            elif model == "mt0":
                self.checkpoint = "bigscience/mt0-xxl"
            else:
                raise Exception("Select one of the following models: flant5 or mt0")

            self.tokenizer = AutoTokenizer.from_pretrained(self.checkpoint)
            self.model = AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint, torch_dtype="auto", device_map="auto")

        def build_prompt(self, prompt_template: str, output_indicator: str, input_text: str):
            if prompt_template:
                prompt = f"{prompt_template} {input_text} {output_indicator}"
            else:
                raise NotImplementedError("Insert a template")
            return prompt

        def predict(self, prompt_template: str, output_indicator: str, data):
            with torch.no_grad():

                if isinstance(data, str):
                    texts = [self.build_prompt(prompt_template, output_indicator, data)]
                elif isinstance(data, pd.DataFrame):
                    texts = data['text'].tolist()
                    texts = [self.build_prompt(prompt_template, output_indicator, t) for t in texts]
                elif isinstance(data, list) and all(isinstance(t, str) for t in data):
                    texts = [self.build_prompt(prompt_template, output_indicator, t) for t in data]
                else:
                    raise ValueError('Input data must be either a string or a pandas DataFrame.')

                raw_dataset = Dataset.from_dict({"text": texts})

                proc_dataset = raw_dataset.map(
                    lambda x: self.tokenizer(
                        x["text"], truncation=True
                    ),  # truncate by default to maximum model length
                    batched=True,
                    load_from_cache_file=False,
                    desc="Running tokenizer on dataset",
                    remove_columns=["text"],
                )

                proc_dataset.set_format("torch")

                loader = torch.utils.data.DataLoader(
                    proc_dataset,
                    shuffle=False,
                    batch_size=512, #default
                    collate_fn=DataCollatorWithPadding(self.tokenizer),
                )

                predictions = []
                for i, batch in tqdm(
                    enumerate(loader), desc=self.checkpoint, total=len(texts) // 512
                ):
                    inputs = {k: v.to(self.model.device) for k, v in batch.items()}
                    outputs = self.model.generate(**inputs)

                    decoded = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
                    predictions.extend(decoded)

                predictions =  list(map(str.lower, predictions))
            return predictions

In [5]:
#For running encoder.py

import pandas as pd
#from typing import List
import torch
#from datasets import Dataset
#from tqdm import tqdm

from openprompt.plms import load_plm
from openprompt.prompts import ManualTemplate
from openprompt.data_utils import InputExample
from openprompt.prompts import ManualVerbalizer
from openprompt import PromptForClassification
from openprompt import PromptDataLoader

class prompting:
        def __init__(self, model="roberta-base"):
            if model == "roberta-base":
                self.checkpoint = ("roberta","roberta-base")
            elif model == "roberta-large":
                self.checkpoint = ("roberta","roberta-large")
            elif model == "bert":
                self.checkpoint = ("bert","bert-base-uncased")
            elif model == "deberta-base":
                self.checkpoint = ("deberta-v3","microsoft/deberta-v3-base")
            elif model == "deberta-large":
                self.checkpoint = ("deberta-v3","microsoft/deberta-v3-large")
            elif model == "xlm-roberta":
                self.checkpoint = ("xlm-roberta-base","xlm-roberta-base")

            else:
                raise Exception("Select one of the following models: roberta-base, roberta-large, bert, deberta-base, deberta-large, xlm")

        def predict(self, template, verb_h, verb_nh, data):
            plm, tokenizer, model_config, WrapperClass = load_plm(self.checkpoint[0], self.checkpoint[1])

            promptTemplate = ManualTemplate(
                 text = f'{{"placeholder":"text_a"}} {template} {{"mask"}}',
                 tokenizer = tokenizer,
                 )

            classes = ["1", "0"]

            label_words = {
                "1": verb_h,
                "0": verb_nh
                }

            print(label_words)


            if isinstance(data, str):
                dataset = [InputExample(guid = 0,
                                        text_a = data)]
            elif isinstance(data, pd.DataFrame):
                dataset = [InputExample(guid = i,
                                        text_a = txt)
                                        for i, txt in enumerate(data["text"])]
            elif isinstance(data, list) and all(isinstance(t, str) for t in data):
                dataset = [InputExample(guid = i,
                                        text_a = txt)
                                        for i, txt in enumerate(data)]
            else:
                raise ValueError('Input data must be either a string or a pandas DataFrame.')


            promptVerbalizer = ManualVerbalizer(
                classes = classes,
                label_words = label_words,
                tokenizer = tokenizer,
            )

            promptModel = PromptForClassification(
                template = promptTemplate,
                plm = plm,
                verbalizer = promptVerbalizer,
            )

            data_loader = PromptDataLoader(
                dataset = dataset,
                tokenizer = tokenizer,
                template = promptTemplate,
                tokenizer_wrapper_class=WrapperClass
            )

            #promptModel = promptModel.cuda()

            predictions = []

            promptModel.eval()
            with torch.no_grad():
                for batch in data_loader:
                    batch = batch.to('cpu')#.cuda()
                    logits = promptModel(batch)
                    preds = torch.argmax(logits, dim = -1)
                    predictions.extend([classes[p] for p in preds.cpu().numpy().tolist()])

            mapper = {"0": "toxic", "1": "respectful"}

            return [mapper[k] for k in predictions]




In [6]:
# Load datasets
def load_datasets():
    datasets = {
        "DAVIDSON": "datasets/sampled_reduced_DAVIDSON.csv",
        #"DYNABENCH": "datasets/sampled_reduced_DYNABENCH.csv", #the tokenization keeps getting looped here
        #"MHS": "datasets/sampled_reduced_MHS.csv",
        #"MLMA": "datasets/sampled_reduced_MLMA.csv"
    }
    return datasets

# Function to calculate macro-F1 score
from sklearn.metrics import f1_score
def calculate_macro_f1(labels, preds):
    return f1_score(labels, preds, average='macro')

In [12]:
#from encoder_lms import prompting

prompt_template = "This text is"
verb_h = ["toxic"]
verb_nh = ["respectful"]

# Models: roberta-base, roberta-large, bert, deberta-base, deberta-large, xlm-roberta
enc_lms = prompting("roberta-base")

# The input can be a dataframe, a text or a list of texts
enc_lms.predict(prompt_template, verb_h, verb_nh, [
                "You seem nice.", "I hope you white devils get skin cancer and die"])




{'1': ['toxic'], '0': ['respectful']}


tokenizing: 2it [00:00, 438.69it/s]


['non-hate', 'hate']

In [6]:
!python --version

Python 3.10.12


In [25]:
#from encoder_lms import prompting

prompt_template = "This text is"
verb_h = ["toxic"]
verb_nh = ["respectful"]

texts = [
    "934 8616 i got a missed call from yo bitch",
    "RT @KINGTUNCHI_: Fucking with a bad bitch you gone need some money lil homie!",
    "RT @eanahS__: @1inkkofrosess lol my credit ain't no where near good, but I know the right man for the job.. that ho nice though!",
    "RT @Maxin_Betha Wipe the cum out of them faggot RT @80sbaby4life Contact lens is wildin can't see shit",
    "Niggas cheat on they bitch and don't expect no pay back WHATSOEVER. Yall just as naive as these bitches.",
    "Mother asked what I wanted for Christmas this year. Replied with for her to not go crazy, and my sister to not be a bitch.",
    "That's real dedication.. &#171;@arigold_88 This bitch got diarrhea and still here, shitting up the bathroom.&#187;",
    "Kiss IMO this conductor, it's 4:30 AM have some courtesy you bitch.",
    "My momma keep talking to me like bitch gtf I'm on Twitter.",
    "RT @karlos_budah: I kno this bitch a thot she wanna meet my momma now",
    "like a bitch pic is flirting nowadays",
    "@Nien_Nunb what did you search? Gay redneck episode 1 play?",
    "Halloween's over, and bitches still playing roles.",
    "fucking queer",
    "Why bitches quick to have a nigga baby, then be suprised when she taking care of it by herself?",
    "I stopped snapping on women and calling them bitches.",
    "@KeebitHalal @LogansWarning gotta love it when the islamofascist cow tries to make a retort about age, typical muzzie idiocy",
    "Idgaf about you flat chested hoes feelings either, you a grown ass man nigga.",
    "So yall give farwells to Lulu yet did all but shit on Kris? Ok bitches... ok",
    "@StunningStina their tortillas are trash though.",
    "RT @_sheswatsPOPPIN: js met a lil bitch wit a long name mufucka had more letters den Tennessee, I go gorilla ina pussy, RIP&#128527;",
    "i need one of these around the world twitter bitches cause these local facebook hoes aint cuttin it lol",
    "RT @JeffDuncan_: JSU coach Omar Johnson: \"It's what u do against the ball. U throw it, hit it, catch it. Doesn't matter if the Yankees are &#8230;",
    "i seen a nigga dressed to impress, hop out da bus n a bitch wit a phatty, get off at his stop too. Say somethin...GOD THROWIN U DA LOB",
    "2 or 3 hoes for everything &#128527;&#128520;&#128526;",
    "If I had hair, I would seriously go for a perm now. Genetics is a stone cold bitch.",
    "Ima start friend zoning these hoes &#128514;&#128514;&#128514;",
    "RT @bdha: http://t.co/zPrnmY7kQR What a shit heel.",
    "This bitch pregnant again &#128064;&#128514;&#128514;",
    "RT @davegetnmoney: We don't want no skinny bitches get out! WE WANT BIG BOOTY BITCHES!",
    "RT @VibeLord: your opinion of me is highly irrelevant bitch",
    "RT @BigSean: Suck my dick bitch RT @MaizeNBlue23: @bigsean not only are you terrible. You are why hip hop is dead",
    "RT @rebellious_midy: Ugly bitches definitely disguste me &#128534; ewww yuck!",
    "This hoe taken by the world smh",
    "RT @CauseWereGuys: How to get unlimited pussy in high school http://t.co/0yaDezrREr",
    "RT @lovexlilyy: don't play games with a girl who can play better, bitch.",
    "I'm just tryna get to sleep before the birds start getting rowdy.",
    "Kiss imik this bitch just added another 50 seconds to her story I think its a new record.",
    "Inde jus got me all hype at work!! I knew my bitch Candace ain't die",
    "@panchoftf red and yellow it should be then",
    "I, for one, welcome our new Eastern Panhandle overlords, and hope they do a better job than the yokels who got us here in the first place.",
    "@ronsonsolaqa I love you too pussy.... Lmaooo",
    "@DrunkAtChurch @MzYummyDread Slide up on a bad bitch/Dat bitch look good",
    "@strawberrycoca ling ling krunkchip",
    "@saucygyro she's a dyke &amp; looks like she's 12",
    "RT @dnumberbefore_2: She used to be more fun as a hoe.. this new you is to positive",
    "RT @YUNGRAJEE: No nigga ever ask before he hit either &#8220;@jaybeeTRENDz: Body counts don't matter! YALL treat all bitches the same anyway, st&#8230;",
    "&#8220;@AshlynWynns: I'm a \"mud shark\" because I have black friends. Makes perfect sense. &#128514;&#128557;&#128514;&#128557; #idiotic&#8221; @",
    "Shylock &amp; Wongs: 3 Incidents of Democrat Bigotry In 3 Weeks - Media Mum http://t.co/rX8ynPg1Yd via @BreitbartNews"]

# Models: roberta-base, roberta-large, bert, deberta-base, deberta-large, xlm-roberta
enc_lms = prompting("roberta-base")

# The input can be a dataframe, a text or a list of texts
predictions = enc_lms.predict(prompt_template, verb_h, verb_nh, texts)
predictions = ["1" if item == "toxic" else "0" for item in predictions]
actual_values = [
    "1", "1", "0", "1", "1", "1", "1", "1", "1", "1",
    "1", "1", "1", "1", "1", "1", "1", "1", "1", "0",
    "1", "1", "0", "1", "1", "1", "1", "1", "1", "1",
    "1", "1", "1", "1", "1", "1", "0", "1", "1", "0",
    "0", "1", "1", "0", "1", "1", "1", "0", "0"
]
labels = actual_values
macro_f1 = calculate_macro_f1(labels, predictions)
print(f"Macro-F1 Score for roberta-base on sampled data: {macro_f1}")




{'1': ['toxic'], '0': ['respectful']}


tokenizing: 49it [00:00, 801.59it/s]


Macro-F1 Score for roberta-base on sampled data: 0.125


Testing performance on different datasets

In [7]:
%cd /content/drive/MyDrive/ANLP_indiv_project

/content/drive/MyDrive/ANLP_indiv_project


In [27]:
models = ["roberta-base", "roberta-large", "bert"]
datasets = load_datasets()
template = "This text is"
verb_h = ["toxic"]
verb_nh = ["respectful"]

results = []

for model in models:
    prompt_model = prompting(model=model)
    for name, path in datasets.items():
        data = pd.read_csv(path)
        predictions = prompt_model.predict(template, verb_h, verb_nh, data)
        # Convert 'hate' to 1 and 'non-hate' to 0
        predictions = [1 if item == "toxic" else 0 for item in predictions]
        labels = data['label'].tolist()
        macro_f1 = calculate_macro_f1(labels, predictions)
        results.append((model, name, macro_f1))
        print(f"Macro-F1 Score for {model} on {name}: {macro_f1}")

# Print all results
for result in results:
    model, dataset_name, macro_f1 = result
    print(f"Model: {model}, Dataset: {dataset_name}, Macro-F1 Score: {macro_f1}")



{'1': ['toxic'], '0': ['respectful']}


tokenizing: 248it [00:00, 286.18it/s]


Macro-F1 Score for roberta-base on DAVIDSON: 0.13888888888888892




config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

{'1': ['toxic'], '0': ['respectful']}


tokenizing: 248it [00:00, 459.10it/s]


Macro-F1 Score for roberta-large on DAVIDSON: 0.13888888888888887




config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

{'1': ['toxic'], '0': ['respectful']}


tokenizing: 248it [00:00, 693.94it/s]


Macro-F1 Score for bert on DAVIDSON: 0.2603080957063258


KeyError: 'deberta-v3'

In [None]:
models = ["flant5","mt0"]
datasets = load_datasets()
template = "This text is"
verb_h = ["toxic"]
verb_nh = ["respectful"]

results = []

for model in models:
    prompt_model = prompting_FT(model=model)
    for name, path in datasets.items():
        data = pd.read_csv(path)
        predictions = prompt_model.predict(template, verb_h, data)
        # Convert 'hate' to 1 and 'non-hate' to 0
        predictions = [1 if item == verb_h else 0 for item in predictions]
        labels = data['label'].tolist()
        macro_f1 = calculate_macro_f1(labels, predictions)
        results.append((model, name, macro_f1))
        print(f"Macro-F1 Score for {model} on {name}: {macro_f1}")

# Print all results
for result in results:
    model, dataset_name, macro_f1 = result
    print(f"Model: {model}, Dataset: {dataset_name}, Macro-F1 Score: {macro_f1}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [28]:
from openprompt.plms import _MODEL_CLASSES

print(_MODEL_CLASSES.keys())


dict_keys(['bert', 'roberta', 'albert', 'gpt', 'gpt2', 't5', 't5-lm', 'opt'])
