In [1]:
from datasets import Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TrainingArguments, logging
from tqdm import tqdm
logging.set_verbosity_error()
from peft import LoraConfig, PeftConfig, PeftModel
from trl import SFTTrainer
import torch
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix
)
import numpy as np
import os
import sys
import gc
milestone_2_path = os.path.abspath("../milestone 2")
sys.path.append(milestone_2_path)

import import_preprocess
from importlib import reload
reload(import_preprocess)

from import_preprocess import ImportPreprocess, convert_labels_to_string, convert_labels_to_int

2025-01-12 13:16:38.886789: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-12 13:16:38.886820: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-12 13:16:38.887963: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-01-12 13:16:38.893817: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Some initial testing with different Llama models

In [2]:
# # model_id = "meta-llama/Llama-3.2-3B"
# model_id = "meta-llama/Llama-3.2-3B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
#
# # gptq_config = GPTQConfig(
# #     bits=bits,
# #     tokenizer=tokenizer,
# #     use_exllama=False,
# #     use_cuda_fp16=True,
# #     dataset="c4-new"
# # )
#
# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     device_map="cuda:0"
# )
#
# model.config.use_cache = False
#
# pipe = pipeline(
#     "text-generation",
#     model=model,
#     tokenizer=tokenizer,
#     # torch_dtype=torch.float16,
#     # device="",
# )
# # quantized_model.save_pretrained(f"test-{model_name}-{bits}bit")

In [3]:
# tokenizer.pad_token_id = tokenizer.eos_token_id
# model.config.use_cache = False
#
# messages = [{"role": "user", "content": "Tell me something unexpected about austria"}]
#
# prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# # outputs = pipe(prompt, max_new_tokens=120, do_sample=True, temperature=0.7)
# outputs = pipe(prompt, max_new_tokens=1000)#, temperature=1.0)
# print(outputs[0]["generated_text"])

# Load data

In [2]:
import_preprocess = ImportPreprocess()
import_preprocess.import_train_val_test()
import_preprocess.create_balanced_dataset(n_samples=7000)

_, y_train, S_train, y_train_multi = import_preprocess.X_train, import_preprocess.y_train, import_preprocess.S_train, import_preprocess.y_train_multi
_, y_val, S_val, y_val_multi = import_preprocess.X_val, import_preprocess.y_val, import_preprocess.S_val, import_preprocess.y_val_multi
_, y_test, S_test, y_test_multi = import_preprocess.X_test, import_preprocess.y_test, import_preprocess.S_test, import_preprocess.y_test_multi
_, y_balanced, S_balanced, y_balanced_multi = import_preprocess.X_train_balanced, import_preprocess.y_train_balanced, import_preprocess.S_train_balanced, import_preprocess.y_train_multi_balanced

print("Number of training samples:", len(y_train))
print("Number of validation samples:", len(y_val))
print("Number of test samples:", len(y_test))
print("Number of balanced samples:", len(y_balanced))

train_df = pd.DataFrame({"comment": S_train, "label": y_train, "multi_label": y_train_multi})
val_df = pd.DataFrame({"comment": S_val, "label": y_val, "multi_label": y_val_multi})
test_df = pd.DataFrame({"comment": S_test, "label": y_test, "multi_label": y_test_multi})
balanced_df = pd.DataFrame({"comment": S_balanced, "label": y_balanced, "multi_label": y_balanced_multi})

Number of training samples: 14000
Number of validation samples: 2000
Number of test samples: 4000
Number of balanced samples: 14000


In [3]:
def generate_prompt(entry):
    return f"""
Classify the text into "sexist" or not "sexist", and return the answer as the corresponding label.
text: {entry["comment"]}
label: {entry["label"]}
""".strip()

def generate_test_prompt(entry):
    return f"""
Classify the text into "sexist" or not "sexist", and return the answer as the corresponding label.
text: {entry["comment"]}
label:
""".strip()

train_df["text"] = train_df.apply(generate_prompt, axis=1)
val_df["text"] = val_df.apply(generate_prompt, axis=1)
balanced_df["text"] = balanced_df.apply(generate_prompt, axis=1)
test_df["text"] = test_df.apply(generate_test_prompt, axis=1)
train_df.label.value_counts()

label
not sexist    10602
sexist         3398
Name: count, dtype: int64

In [4]:
train_data = Dataset.from_pandas(train_df)
val_data = Dataset.from_pandas(val_df)
balanced_data = Dataset.from_pandas(balanced_df)
test_data = Dataset.from_pandas(test_df)

# Evaluate model without fine-tuning

In [5]:
def clear_memory():
    try:
        global model
        global tokenizer
        del model, tokenizer
    except NameError:
        pass
    finally:
        gc.collect()
        torch.cuda.empty_cache()

In [8]:
clear_memory()

model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="cuda:0"
)

model.config.use_cache = False
tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
def predict(test_data, model, tokenizer, categories, temperature=0.01):
    y_pred = []
    categories = categories

    for i in tqdm(range(len(test_data))):
        prompt = test_data[i]["text"]

        pipe = pipeline(
            task="text-generation",
            model=model,
            tokenizer=tokenizer,
            max_new_tokens=2,
            temperature=temperature
        )
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()

        # Determine the predicted category
        for category in categories:
            if category == answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

    return y_pred

In [10]:
def evaluate(y_true, y_pred):
    labels = ["not sexist", "sexist", "none"]
    mapping = {label: idx for idx, label in enumerate(labels)}

    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data

    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))), zero_division=0)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [11]:
y_pred = predict(test_data, model, tokenizer, ["not sexist", "sexist"])
evaluate(y_test, y_pred)

100%|██████████| 4000/4000 [03:07<00:00, 21.38it/s]

Accuracy: 0.666
Accuracy for label not sexist: 0.650
Accuracy for label sexist: 0.713

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.88      0.65      0.75      3030
      sexist       0.40      0.71      0.51       970
        none       0.00      0.00      0.00         0

    accuracy                           0.67      4000
   macro avg       0.42      0.45      0.42      4000
weighted avg       0.76      0.67      0.69      4000


Confusion Matrix:
[[1971 1055    4]
 [ 276  692    2]
 [   0    0    0]]





# Define functions for fine-tuning

In [12]:
def find_all_linear_names(model):
    cls = torch.nn.Linear
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [13]:
def fine_tune(model, tokenizer, train_dataset, eval_dataset, fine_tuned_dir, new_model_dir, epochs):
    modules = find_all_linear_names(model)

    peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=modules,
    )

    training_arguments = TrainingArguments(
        output_dir=fine_tuned_dir,                    # directory to save and repository id
        num_train_epochs=epochs,                  # number of training epochs
        per_device_train_batch_size=1,            # batch size per device during training
        gradient_accumulation_steps=8,            # number of steps before performing a backward/update pass
        gradient_checkpointing=True,              # use gradient checkpointing to save memory
        optim="paged_adamw_32bit",
        logging_steps=1,
        learning_rate=2e-4,                       # learning rate, based on QLoRA paper
        weight_decay=0.001,
        fp16=True,
        bf16=False,
        max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper
        max_steps=-1,
        warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper
        group_by_length=False,
        lr_scheduler_type="cosine",               # use cosine learning rate scheduler
        eval_strategy="steps",              # save checkpoint every epoch
        eval_steps = 0.2
    )

    trainer = SFTTrainer(
        model=model,
        args=training_arguments,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        peft_config=peft_config,
        tokenizer=tokenizer
    )

    trainer.train()

    trainer.save_model(output_dir)
    tokenizer.save_pretrained(output_dir)


    model = PeftModel.from_pretrained(model, output_dir)
    model = model.merge_and_unload()

    model.save_pretrained(new_model_dir)
    tokenizer.save_pretrained(new_model_dir)

    del trainer

    return model, tokenizer

# Fine-tune model on train set

In [14]:
clear_memory()

model_id = "meta-llama/Llama-3.2-3B-Instruct"
output_dir = "llama-3.2-fine-tuned-train"
new_model_dir = "Llama-3.2-3B-Instruct-Sexism-Classification-Train"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="cuda:0"
)
model.config.use_cache = False
tokenizer.pad_token_id = tokenizer.eos_token_id

model, tokenizer = fine_tune(model, tokenizer, train_data, val_data, output_dir, new_model_dir, 1)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = SFTTrainer(


Map:   0%|          | 0/14000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

{'loss': 33.9803, 'grad_norm': 5.636348247528076, 'learning_rate': 3.7735849056603773e-06, 'epoch': 0.0005714285714285715}
{'loss': 31.6195, 'grad_norm': 5.700258255004883, 'learning_rate': 7.547169811320755e-06, 'epoch': 0.001142857142857143}
{'loss': 31.1078, 'grad_norm': 4.9772820472717285, 'learning_rate': 1.1320754716981132e-05, 'epoch': 0.0017142857142857142}
{'loss': 32.858, 'grad_norm': 5.032081604003906, 'learning_rate': 1.509433962264151e-05, 'epoch': 0.002285714285714286}
{'loss': 32.1841, 'grad_norm': 5.080230712890625, 'learning_rate': 1.8867924528301888e-05, 'epoch': 0.002857142857142857}
{'loss': 30.8043, 'grad_norm': 5.183412075042725, 'learning_rate': 2.2641509433962265e-05, 'epoch': 0.0034285714285714284}
{'loss': 30.3596, 'grad_norm': 4.974921703338623, 'learning_rate': 2.641509433962264e-05, 'epoch': 0.004}
{'loss': 31.3661, 'grad_norm': nan, 'learning_rate': 2.641509433962264e-05, 'epoch': 0.004571428571428572}
{'loss': 32.875, 'grad_norm': 5.208718299865723, 'lear



{'loss': 15.0099, 'grad_norm': 2.0205161571502686, 'learning_rate': 0.00016767078845955876, 'epoch': 0.2862857142857143}
{'loss': 13.3975, 'grad_norm': 1.8167221546173096, 'learning_rate': 0.00016753437306906444, 'epoch': 0.28685714285714287}
{'loss': 11.3787, 'grad_norm': 1.818273901939392, 'learning_rate': 0.00016739772622663868, 'epoch': 0.2874285714285714}
{'loss': 13.5452, 'grad_norm': 2.116971015930176, 'learning_rate': 0.00016726084840059367, 'epoch': 0.288}
{'loss': 15.1088, 'grad_norm': 1.8673913478851318, 'learning_rate': 0.0001671237400600334, 'epoch': 0.2885714285714286}
{'loss': 15.7039, 'grad_norm': 2.574392795562744, 'learning_rate': 0.00016698640167485168, 'epoch': 0.28914285714285715}
{'loss': 13.5244, 'grad_norm': 2.1919968128204346, 'learning_rate': 0.00016684883371573093, 'epoch': 0.2897142857142857}
{'loss': 14.2572, 'grad_norm': 1.8558319807052612, 'learning_rate': 0.00016671103665414014, 'epoch': 0.29028571428571426}
{'loss': 15.2577, 'grad_norm': 2.0854070186614



{'loss': 14.3645, 'grad_norm': 1.9327473640441895, 'learning_rate': 1.0602912141225752e-05, 'epoch': 0.8577142857142858}
{'loss': 14.9206, 'grad_norm': 2.0385379791259766, 'learning_rate': 1.052010566152808e-05, 'epoch': 0.8582857142857143}
{'loss': 13.2511, 'grad_norm': 2.249260425567627, 'learning_rate': 1.0437605844844045e-05, 'epoch': 0.8588571428571429}
{'loss': 17.2176, 'grad_norm': 2.009589910507202, 'learning_rate': 1.0355412973914802e-05, 'epoch': 0.8594285714285714}
{'loss': 13.6912, 'grad_norm': 2.049252986907959, 'learning_rate': 1.0273527330429455e-05, 'epoch': 0.86}
{'loss': 11.2514, 'grad_norm': 1.7137641906738281, 'learning_rate': 1.0191949195024175e-05, 'epoch': 0.8605714285714285}
{'loss': 12.8583, 'grad_norm': 1.8488352298736572, 'learning_rate': 1.011067884728134e-05, 'epoch': 0.8611428571428571}
{'loss': 10.8574, 'grad_norm': 2.212052583694458, 'learning_rate': 1.0029716565728408e-05, 'epoch': 0.8617142857142858}
{'loss': 15.5583, 'grad_norm': 1.9721678495407104, '

In [15]:
y_pred = predict(test_data, model, tokenizer, ["not sexist", "sexist"])
evaluate(y_test, y_pred)

100%|██████████| 4000/4000 [03:11<00:00, 20.86it/s]


Accuracy: 0.882
Accuracy for label not sexist: 0.945
Accuracy for label sexist: 0.688

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.90      0.94      0.92      3030
      sexist       0.80      0.69      0.74       970
        none       0.00      0.00      0.00         0

    accuracy                           0.88      4000
   macro avg       0.57      0.54      0.55      4000
weighted avg       0.88      0.88      0.88      4000


Confusion Matrix:
[[2863  167    0]
 [ 303  667    0]
 [   0    0    0]]


# Fine-tune model on balanced data set

In [16]:
model_id = "meta-llama/Llama-3.2-3B-Instruct"
output_dir_base = "llama-3.2-fine-tuned-balanced"
new_model_dir_base = "Llama-3.2-3B-Instruct-Sexism-Classification-Balanced"

# for i in range(5):
epochs = 1
clear_memory()
# if i != 0:
#     model_id = f"{new_model_dir_base}-{i}"
output_dir = f"{output_dir_base}-{epochs}"
new_model_dir = f"{new_model_dir_base}-{epochs}"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="cuda:0"
)
model.config.use_cache = False
tokenizer.pad_token_id = tokenizer.eos_token_id

model, tokenizer = fine_tune(model, tokenizer, balanced_data, val_data, output_dir, new_model_dir, epochs)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  trainer = SFTTrainer(


Map:   0%|          | 0/14000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

{'loss': 30.765, 'grad_norm': 5.251967430114746, 'learning_rate': 3.7735849056603773e-06, 'epoch': 0.0005714285714285715}
{'loss': 31.2821, 'grad_norm': 4.6755805015563965, 'learning_rate': 7.547169811320755e-06, 'epoch': 0.001142857142857143}
{'loss': 31.2689, 'grad_norm': 5.382378578186035, 'learning_rate': 1.1320754716981132e-05, 'epoch': 0.0017142857142857142}
{'loss': 30.5612, 'grad_norm': 5.037851333618164, 'learning_rate': 1.509433962264151e-05, 'epoch': 0.002285714285714286}
{'loss': 32.3039, 'grad_norm': 5.508377552032471, 'learning_rate': 1.8867924528301888e-05, 'epoch': 0.002857142857142857}
{'loss': 34.0859, 'grad_norm': 5.3038411140441895, 'learning_rate': 2.2641509433962265e-05, 'epoch': 0.0034285714285714284}
{'loss': 29.5747, 'grad_norm': 5.524142265319824, 'learning_rate': 2.641509433962264e-05, 'epoch': 0.004}
{'loss': 31.6042, 'grad_norm': nan, 'learning_rate': 2.641509433962264e-05, 'epoch': 0.004571428571428572}
{'loss': 30.3224, 'grad_norm': 4.376418113708496, 'le



{'loss': 15.3345, 'grad_norm': 2.4086215496063232, 'learning_rate': 0.00016767078845955876, 'epoch': 0.2862857142857143}
{'loss': 15.3803, 'grad_norm': 2.285611391067505, 'learning_rate': 0.00016753437306906444, 'epoch': 0.28685714285714287}
{'loss': 11.7627, 'grad_norm': 2.3156628608703613, 'learning_rate': 0.00016739772622663868, 'epoch': 0.2874285714285714}
{'loss': 11.43, 'grad_norm': 2.351402997970581, 'learning_rate': 0.00016726084840059367, 'epoch': 0.288}
{'loss': 13.6528, 'grad_norm': 2.4265129566192627, 'learning_rate': 0.0001671237400600334, 'epoch': 0.2885714285714286}
{'loss': 17.1748, 'grad_norm': 2.5221033096313477, 'learning_rate': 0.00016698640167485168, 'epoch': 0.28914285714285715}
{'loss': 13.5508, 'grad_norm': 2.1582894325256348, 'learning_rate': 0.00016684883371573093, 'epoch': 0.2897142857142857}
{'loss': 13.0364, 'grad_norm': 2.7783362865448, 'learning_rate': 0.00016671103665414014, 'epoch': 0.29028571428571426}
{'loss': 13.1939, 'grad_norm': 2.2178521156311035,



{'loss': 12.7431, 'grad_norm': 2.965944290161133, 'learning_rate': 1.0602912141225752e-05, 'epoch': 0.8577142857142858}
{'loss': 12.8226, 'grad_norm': 3.0520377159118652, 'learning_rate': 1.052010566152808e-05, 'epoch': 0.8582857142857143}
{'loss': 11.7517, 'grad_norm': 3.0872230529785156, 'learning_rate': 1.0437605844844045e-05, 'epoch': 0.8588571428571429}
{'loss': 14.2737, 'grad_norm': 3.169109582901001, 'learning_rate': 1.0355412973914802e-05, 'epoch': 0.8594285714285714}
{'loss': 14.0069, 'grad_norm': 2.955272674560547, 'learning_rate': 1.0273527330429455e-05, 'epoch': 0.86}
{'loss': 11.7563, 'grad_norm': 3.008347272872925, 'learning_rate': 1.0191949195024175e-05, 'epoch': 0.8605714285714285}
{'loss': 14.0436, 'grad_norm': 3.0531046390533447, 'learning_rate': 1.011067884728134e-05, 'epoch': 0.8611428571428571}
{'loss': 13.5146, 'grad_norm': 3.426321506500244, 'learning_rate': 1.0029716565728408e-05, 'epoch': 0.8617142857142858}
{'loss': 12.6892, 'grad_norm': 3.3877508640289307, 'l

In [17]:
y_pred = predict(test_data, model, tokenizer, ["not sexist", "sexist"])
evaluate(y_test, y_pred)

100%|██████████| 4000/4000 [03:00<00:00, 22.11it/s]

Accuracy: 0.855
Accuracy for label not sexist: 0.857
Accuracy for label sexist: 0.849

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.95      0.86      0.90      3030
      sexist       0.66      0.85      0.74       970
        none       0.00      0.00      0.00         0

    accuracy                           0.86      4000
   macro avg       0.53      0.57      0.55      4000
weighted avg       0.88      0.86      0.86      4000


Confusion Matrix:
[[2597  433    0]
 [ 146  824    0]
 [   0    0    0]]





In [12]:
# y_pred = predict(test_data, model, tokenizer, ["not sexist", "sexist"])
# evaluate(y_test, y_pred)

100%|██████████| 4000/4000 [04:48<00:00, 13.85it/s]

Accuracy: 0.869
Accuracy for label not sexist: 0.884
Accuracy for label sexist: 0.820

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.94      0.88      0.91      3030
      sexist       0.69      0.82      0.75       970
        none       0.00      0.00      0.00         0

    accuracy                           0.87      4000
   macro avg       0.54      0.57      0.55      4000
weighted avg       0.88      0.87      0.87      4000


Confusion Matrix:
[[2680  350    0]
 [ 175  795    0]
 [   0    0    0]]





In [17]:
# # fine-tuned on train set
# y_pred = predict(test_data, model, tokenizer, ["not sexist", "sexist"])
# evaluate(y_test, y_pred)

100%|██████████| 4000/4000 [04:43<00:00, 14.12it/s]

Accuracy: 0.880
Accuracy for label not sexist: 0.943
Accuracy for label sexist: 0.684

Classification Report:
              precision    recall  f1-score   support

  not sexist       0.90      0.94      0.92      3030
      sexist       0.79      0.68      0.73       970
        none       0.00      0.00      0.00         0

    accuracy                           0.88      4000
   macro avg       0.57      0.54      0.55      4000
weighted avg       0.88      0.88      0.88      4000


Confusion Matrix:
[[2858  172    0]
 [ 307  663    0]
 [   0    0    0]]





In [5]:
# model_id = "meta-llama/Llama-3.2-3B-Instruct"
# tokenizer = AutoTokenizer.from_pretrained(model_id)
#
# model = AutoModelForCausalLM.from_pretrained(
#     model_id,
#     device_map="cuda:0"
# )
#
# model.config.use_cache = False
# tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [105]:
# pipe = pipeline(task="text-generation",
#                 model=model,
#                 tokenizer=tokenizer,
#                 max_new_tokens=2,
#                 temperature=0.1)
#
# prompt = f"""
# Classify the text into "sexist" or "not sexist", and return the answer as the corresponding label.
# text: I don't hate women
# label:
# """.strip()
# result = pipe(prompt)
# print(result[0]['generated_text'])

Device set to use cuda:0


Classify the text into "sexist" or "not sexist", and return the answer as the corresponding label.
text: I don't hate women
label: not sexist


In [31]:
# len(tokenizer.tokenize("Pretty sure it's because they stumbled on porn when young and that traumatized them, so they keep repeating that trauma. Its very common in sexually abused women to get into shit like BDSM, because it gives them a sense of control over their trauma."))

50