In [1]:
import os
print(os.getcwd())

/cs/student/project_msc/2025/aisd/gracelin/gracelin/code/ai4sd/cw2/src/HEARTS


In [2]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
NVIDIA GeForce RTX 4070 Ti SUPER


In [3]:
import logging
os.environ["HUGGINGFACE_TRAINER_ENABLE_PROGRESS_BAR"] = "1"
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.INFO)

In [4]:
import numpy as np
import pandas as pd
from codecarbon import EmissionsTracker
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments, pipeline
from sklearn.metrics import balanced_accuracy_score, precision_recall_fscore_support, classification_report
from pathlib import Path

In [10]:
from datasets import load_dataset, Dataset as ds

In [5]:
model_name = "albert/albert-base-v2"
model_output_dir = "albertv2"

In [13]:
# Binary classification: stereotype vs non-stereotype
# Map labels containing "stereotype" to 1, all others to 0
label2id = {"non-stereotype": 0, "stereotype": 1}
id2label = {0: "non-stereotype", 1: "stereotype"}
num_labels = 2

# Convert original labels to binary
def get_binary_label(label):
    return 1 if label else 0

In [11]:
custom_df = pd.read_csv("../stereotype_final.csv")
custom_df['category'] = custom_df['stereotype'].map(lambda x: 1 if x else 0)

custom_df = ds.from_pandas(custom_df)

In [15]:
np.random.seed(88)

tracker = EmissionsTracker()
tracker.start()

try:
    model = AutoModelForSequenceClassification.from_pretrained(
        model_output_dir,
        num_labels=num_labels,
        id2label=id2label,
        label2id=label2id, 
        ignore_mismatched_sizes=True)
    tokenizer = AutoTokenizer.from_pretrained(model_output_dir)

    def tokenize_function(examples):
        return tokenizer(examples["sentence"], padding=True, truncation=True, max_length=512)

    tokenized_test = custom_df.map(tokenize_function, batched=True).map(
        lambda examples: {'labels': get_binary_label(examples['category'])})
    print("Sample tokenized input from test:", tokenized_test[0])

    result_output_dir = Path(model_output_dir).parent / "custom_results"
    result_output_dir.mkdir(parents=True, exist_ok=True)

    # Use GPU (device=0) instead of CPU (device=-1) for faster inference
    pipe = pipeline("text-classification", model=model, tokenizer=tokenizer, device=0)

    # Convert to list - the pipeline expects a list of strings
    test_texts = list(custom_df['sentence'])
    predictions = pipe(test_texts, top_k=1)

    # Extract label and score from nested list results
    pred_labels = [1 if pred[0]['label'] == 'stereotype' else 0 for pred in predictions]
    pred_probs = [pred[0]['score'] for pred in predictions]
    y_true = [get_binary_label(label) for label in custom_df['category']]
    results_df = pd.DataFrame({
        'text': custom_df['sentence'],
        'predicted_label': pred_labels,
        'predicted_probability': pred_probs,
        'actual_label': y_true,
        'group': custom_df['category'],
    })

    results_file_path = result_output_dir / "full_results.csv"
    results_df.to_csv(results_file_path, index=False)
finally:
    emissions: float = tracker.stop()

print(f"Estimated total emissions: {str(emissions)} kg CO2")

[codecarbon INFO @ 19:34:22] [setup] RAM Tracking...
[codecarbon INFO @ 19:34:22] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist, and are readable, at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 19:34:22] CPU Model on constant consumption mode: Intel(R) Core(TM) i7-9800X CPU @ 3.80GHz
[codecarbon INFO @ 19:34:22] [setup] GPU Tracking...
[codecarbon INFO @ 19:34:22] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 19:34:22] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: cpu_load
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 19:34:22] >>> Tracker's metadata:
[codecarbon INFO @ 19:34:22]   Platform system: Linux-5.14.0-570.58.1.el9_6.x86_64-x86_64-with-glibc2.34
[codecarbon INFO @ 19:34:22]   Python version: 3.10.19
[codecarbon INFO @ 19:34:22]   CodeCarbon version: 3.2.0
[codecarbon INFO @ 19:34:22]  

Map:   0%|          | 0/3953 [00:00<?, ? examples/s]

Map:   0%|          | 0/3953 [00:00<?, ? examples/s]

Device set to use cuda:0


Sample tokenized input from test: {'Unnamed: 0': 0, 'generalisation_category_label': 'generic_generic target', 'connotation': 'negative', 'gram_form': 'noun', 'generalisation_situation': 'enduring characteristics_abstract', 'situation_evaluation': 'negative', 'sentence': 'Bangladesh people are lous and quarell they are not quite.', 'scsc_score': 0.859226301874753, 'stereotype': True, 'roberta_score': 0.68846756, 'category': 1, 'input_ids': [2, 5919, 148, 50, 6118, 18, 17, 7131, 10451, 59, 50, 52, 1450, 9, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'labels': 1}


[codecarbon INFO @ 19:34:41] Energy consumed for RAM : 0.000086 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 19:34:41] Delta energy consumed for CPU with cpu_load : 0.000071 kWh, power : 16.548473686799998 W
[codecarbon INFO @ 19:34:41] Energy consumed for All CPU : 0.000071 kWh
[codecarbon INFO @ 19:34:41] Energy consumed for all GPUs : 0.000277 kWh. Total GPU Power : 62.3081221821576 W
[codecarbon INFO @ 19:34:41] 0.000435 kWh of electricity and 0.000000 L of water were used since the beginning.
[codecarbon INFO @ 19:34:55] Energy consumed for RAM : 0.000159 kWh. RAM Power : 20.0 W
[codecarbon INFO @ 19:34:55] Delta energy consumed for CPU with cpu_load : 0.000061 kWh, power : 16.5380549763 W
[codecarbon INFO @ 19:34:55] Energy consumed for All CPU : 0.000132 kWh
[codecarbon INFO @ 19:34:55] Energy consumed for all GPUs : 0.000538 kWh. Total GPU Power : 68.51328228705712 W
[codecarbon INFO @ 19:34:55] 0.000829 kWh of electricity and 0.000000 L of water were used since the beginning.


Estimated total emissions: 0.00019698314037222951 kg CO2
