# Task A approach using DeepSeek R1 Distill for Sequence Classification

In [1]:
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
# MODEL_NAME = "Qwen/Qwen2.5-1.5B"

In [2]:
LANG = "eng"

In [3]:
import torch
import random
import transformers
import numpy as np


def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    transformers.set_seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def is_mps_available() -> bool:
    return torch.backends.mps.is_available() and torch.backends.mps.is_built()


def is_cuda_available() -> bool:
    return torch.cuda.is_available()


def get_device() -> str:
    return "mps" if is_mps_available() else "cuda" if is_cuda_available() else "cpu"

In [None]:
RANDOM_SEED = 42

set_seed(RANDOM_SEED)

DEVICE = get_device()
print(f"Device: {DEVICE}")

Device: cuda


In [5]:
import torch

torch.cuda.empty_cache()

In [6]:
TRAIN_FILE_NAME = "./data/public_data_test/track_a/train/eng.csv"

DEV_FILE_NAME = "./data/public_data_test/track_a/dev/eng.csv"

TEST_FILE_NAME = "./data/public_data_test/track_a/test/eng.csv"

In [7]:
import pandas as pd

def load_data(file):
    return pd.read_csv(file)

In [8]:
df_train_full = load_data(TRAIN_FILE_NAME)

df_dev_test = load_data(DEV_FILE_NAME)

df_test = load_data(TEST_FILE_NAME)

In [9]:
df_train_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2768 entries, 0 to 2767
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   id        2768 non-null   object
 1   text      2768 non-null   object
 2   anger     2768 non-null   int64 
 3   fear      2768 non-null   int64 
 4   joy       2768 non-null   int64 
 5   sadness   2768 non-null   int64 
 6   surprise  2768 non-null   int64 
dtypes: int64(5), object(2)
memory usage: 151.5+ KB


In [None]:
df_train_full.head()

Unnamed: 0,id,text,anger,fear,joy,sadness,surprise
0,eng_train_track_a_00001,"Colorado, middle of nowhere.",0,1,0,0,1
1,eng_train_track_a_00002,This involved swimming a pretty large lake tha...,0,1,0,0,0
2,eng_train_track_a_00003,It was one of my most shameful experiences.,0,1,0,1,0
3,eng_train_track_a_00004,"After all, I had vegetables coming out my ears...",0,0,0,0,0
4,eng_train_track_a_00005,Then the screaming started.,0,1,0,1,1


In [9]:
labels = [label for label in df_train_full.keys() if label not in ['id', 'text']]
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}

labels

['anger', 'fear', 'joy', 'sadness', 'surprise']

In [10]:
id2label

{0: 'anger', 1: 'fear', 2: 'joy', 3: 'sadness', 4: 'surprise'}

In [11]:
label2id

{'anger': 0, 'fear': 1, 'joy': 2, 'sadness': 3, 'surprise': 4}

In [12]:
from sklearn.model_selection import train_test_split

df_train, df_dev = train_test_split(df_train_full, test_size=0.2, random_state=RANDOM_SEED)

df_train = df_train.reset_index(drop=True)
df_dev = df_dev.reset_index(drop=True)

In [12]:
df_train = df_train_full.copy()
df_dev = df_dev_test.copy()

In [13]:
from datasets import Dataset, DatasetDict

ds_train = Dataset.from_pandas(df_train)
ds_dev = Dataset.from_pandas(df_dev)
# ds_dev_test = Dataset.from_pandas(df_dev_test)
ds_test = Dataset.from_pandas(df_test)

# ds_train_shuffled = ds_train.shuffle(seed=RANDOM_SEED)

dataset = DatasetDict({
    'train': ds_train,
    'dev': ds_dev,
    # 'dev_test': ds_dev_test,
    'test': ds_test,
})

In [14]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'text', 'anger', 'fear', 'joy', 'sadness', 'surprise'],
        num_rows: 2768
    })
    dev: Dataset({
        features: ['id', 'text', 'anger', 'fear', 'joy', 'sadness', 'surprise'],
        num_rows: 116
    })
    test: Dataset({
        features: ['id', 'text', 'anger', 'fear', 'joy', 'sadness', 'surprise'],
        num_rows: 2767
    })
})

In [15]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

In [16]:
MAX_LEN = 256


def preprocess_dataset(ds):
    # take a batch of texts
    text = ds['text']

    # encode them
    encoding = tokenizer(
        text,
        padding="max_length",
        truncation=True,
        max_length=MAX_LEN
    )

    # add labels
    labels_batch = {k: ds[k] for k in ds.keys() if k in labels}

    # create numpy array of shape (batch_size, num_labels)
    labels_matrix = np.zeros((len(text), len(labels)))

    # fill numpy array
    for idx, label in enumerate(labels):
        labels_matrix[:, idx] = labels_batch[label]

    encoding["labels"] = labels_matrix.tolist()

    return encoding


tokenized_datasets = dataset.map(preprocess_dataset, batched=True)  # remove_columns=dataset['train'].column_names
tokenized_datasets.set_format("torch")

Map:   0%|          | 0/2768 [00:00<?, ? examples/s]

Map:   0%|          | 0/116 [00:00<?, ? examples/s]

Map:   0%|          | 0/2767 [00:00<?, ? examples/s]

In [17]:
example = tokenized_datasets['train'][0]
print(example.keys())

dict_keys(['id', 'text', 'anger', 'fear', 'joy', 'sadness', 'surprise', 'input_ids', 'attention_mask', 'labels'])


In [18]:
tokenizer.decode(example['input_ids'])

'<｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁sentence｜><｜end▁of▁se

In [19]:
example['labels']

tensor([0., 1., 0., 0., 1.])

In [20]:
[id2label[idx] for idx, label in enumerate(example['labels']) if label == 1.0]

['fear', 'surprise']

# Load model

In [17]:
import torch
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, # enable 4-bit quantization
    bnb_4bit_quant_type = 'nf4', # information theoretically optimal dtype for normally distributed weights
    bnb_4bit_use_double_quant = True, # quantize quantized weights //insert xzibit meme
    bnb_4bit_compute_dtype = torch.bfloat16 # optimized fp format for ML
)

In [18]:
from peft import LoraConfig

lora_config = LoraConfig(
    r = 4, # the dimension of the low-rank matrices
    lora_alpha = 16, # scaling factor for LoRA activations vs pre-trained weight activations
    target_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj'],
    lora_dropout = 0.2, # dropout probability of the LoRA layers
    bias = 'none', # wether to train bias weights, set to 'none' for attention layers
    task_type = 'SEQ_CLS'
)

  warn(


In [19]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    problem_type="multi_label_classification",
    quantization_config=quantization_config,
    num_labels=len(labels),
    id2label=id2label,
    label2id=label2id,
)

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at deepseek-ai/DeepSeek-R1-Distill-Llama-8B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

In [21]:
from peft import get_peft_model

model = get_peft_model(model, lora_config)
model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): LlamaForSequenceClassification(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.2, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=4, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=4, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              

In [22]:
# If a pad_token_id is defined in the configuration,
# it finds the last token that is not a padding token in each row.
# If no pad_token_id is defined, it simply takes the last value in each row of the batch.
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False
model.config.pretraining_tp = 1

In [23]:
model.print_trainable_parameters()

trainable params: 3,428,352 || all params: 7,508,373,504 || trainable%: 0.0457


# Measure the initial performance of the model

In [28]:
df_dev

Unnamed: 0,id,text,anger,fear,joy,sadness,surprise
0,eng_dev_track_a_00001,Older sister (23 at the time) is a Scumbag Stacy.,1,0,0,0,0
1,eng_dev_track_a_00002,"And I laughed like this: garhahagar, because m...",0,1,0,0,0
2,eng_dev_track_a_00003,It overflowed and brown shitty diarrhea water ...,1,1,0,1,1
3,eng_dev_track_a_00004,Its very dark and foggy.,0,1,0,0,0
4,eng_dev_track_a_00005,"Then she tried to, like, have sex with/strangl...",1,1,0,0,1
...,...,...,...,...,...,...,...
111,eng_dev_track_a_00112,My heart was beating fast from excitement.,0,0,1,0,0
112,eng_dev_track_a_00113,A fraying rope stretches down from the rafters.,0,1,0,0,1
113,eng_dev_track_a_00114,so i cried my eyes out and did the drawing.,0,0,0,1,0
114,eng_dev_track_a_00115,Never been so close to a group ass-wooping in ...,1,1,0,0,1


In [31]:
import torch

from tqdm import tqdm
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    # confusion_matrix, classification_report, balanced_accuracy_score,
)


def get_performance_metrics(y_true, y_pred):
    f1_macro = f1_score(y_true, y_pred, average='macro')
    f1_micro = f1_score(y_true, y_pred, average='micro')
    acc = accuracy_score(y_true, y_pred)
    # bal_acc = balanced_accuracy_score(y_true, y_pred)
    # cm = confusion_matrix(y_true, y_pred, labels=labels)
    # cr = classification_report(y_true, y_pred, labels=labels)

    return {
        'f1-macro': f1_macro,
        'f1-micro': f1_micro,
        'accuracy': acc,
        # 'balanced_accuracy': bal_acc,
        # 'confusion_matrix': cm,
        # 'classification_report': cr
    }


def measure_initial_performance(model, tokenizer, ds_test, device, max_length=512, batch_size=64):
    model.to(device)
    model.eval()

    input_ids = ds_test['input_ids']
    attention_mask = ds_test['attention_mask']
    labels = ds_test['labels']

    predictions = []
    for i in tqdm(range(0, len(input_ids), batch_size)):
        batch_input_ids = input_ids[i:i + batch_size]
        batch_attention_mask = attention_mask[i:i + batch_size]

        batch_inputs = {
            'input_ids': batch_input_ids.to(device),
            'attention_mask': batch_attention_mask.to(device),
        }

        with torch.no_grad():
            output = model(**batch_inputs)

            logits = output.logits

            # apply sigmoid + threshold
            sigmoid = torch.nn.Sigmoid()
            probs = sigmoid(logits.squeeze().cpu())
            preds = np.zeros(probs.shape)
            preds[np.where(probs >= 0.5)] = 1

            predictions.append(torch.tensor(preds))

    # concatenate all predictions for all batches
    predictions = torch.cat(predictions, dim=0).cpu().numpy()
    # print(predictions)

    performance = get_performance_metrics(labels, predictions)
    return performance


initial_performance = measure_initial_performance(
    model,
    tokenizer,
    tokenized_datasets['dev'],
    DEVICE,
    max_length=MAX_LEN,
    batch_size=16
)

100%|██████████| 8/8 [00:11<00:00,  1.46s/it]


In [32]:
initial_performance

{'f1-macro': 0.2885353742067357,
 'f1-micro': 0.3374485596707819,
 'accuracy': 0.0}

# Train the model

In [24]:
import torch

from transformers import EvalPrediction
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score


# source: https://jesusleal.io/2021/04/21/Longformer-multilabel-classification/
def multi_label_metrics(predictions, labels, threshold=0.5):
    # first, apply sigmoid on predictions which are of shape (batch_size, num_labels)
    sigmoid = torch.nn.Sigmoid()
    probs = sigmoid(torch.Tensor(predictions))

    # next, use threshold to turn them into integer predictions
    y_pred = np.zeros(probs.shape)
    y_pred[np.where(probs >= threshold)] = 1

    # finally, compute metrics
    y_true = labels

    f1_macro_average = f1_score(y_true=y_true, y_pred=y_pred, average='macro')
    f1_micro_average = f1_score(y_true=y_true, y_pred=y_pred, average='micro')
    roc_auc = roc_auc_score(y_true, y_pred, average = 'micro')
    accuracy = accuracy_score(y_true, y_pred)

    # return as dictionary
    metrics = {
        'f1_macro': f1_macro_average,
        'f1_micro': f1_micro_average,
        'roc_auc': roc_auc,
        'accuracy': accuracy
    }

    return metrics


def compute_metrics(p: EvalPrediction):
    preds = (
        p.predictions[0]
        if isinstance(p.predictions, tuple)
        else p.predictions
    )

    result = multi_label_metrics(
        predictions=preds,
        labels=p.label_ids
    )

    return result

In [32]:
tokenized_datasets['train'][0]['labels'].type()

'torch.FloatTensor'

In [33]:
tokenized_datasets['train']['input_ids'][0]

tensor([  2403,    518,   3055,    582,   1486,     13, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643, 151643,
        151643, 151643, 151643, 151643, 

In [34]:
#forward pass
outputs = model(
    input_ids=tokenized_datasets['train']['input_ids'][0].unsqueeze(0),
    labels=tokenized_datasets['train'][0]['labels'].unsqueeze(0)
)
outputs

SequenceClassifierOutputWithPast(loss=tensor(1.6129, grad_fn=<ToCopyBackward0>), logits=tensor([[ 2.8273, -1.8645, -3.7161,  0.3752, -0.7185]],
       grad_fn=<ToCopyBackward0>), past_key_values=None, hidden_states=None, attentions=None)

In [27]:
!rm -rf $OUTPUT_DIR/

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [25]:
EXPERIMENT_NUMBER = "003"
OUTPUT_DIR = f'./runs/track_a/{MODEL_NAME}/{LANG}_exp_{EXPERIMENT_NUMBER}'

In [26]:
OUTPUT_DIR

'./runs/track_a/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/eng_exp_003'

In [27]:
# METRIC_NAME = "f1_micro"
METRIC_NAME = "eval_loss"

params = {
    "learning_rate": 2e-5,  # 2e-4, 5e-5
    "num_train_epochs": 10,
    "weight_decay": 2e-2,
    "weight_ratio": 0.01,
    "batch_size": 24,
    "seed": RANDOM_SEED,
}

In [28]:
from transformers import DataCollatorWithPadding

collate_fn = DataCollatorWithPadding(tokenizer=tokenizer)

In [30]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    learning_rate=params["learning_rate"],
    per_device_train_batch_size=params["batch_size"],
    per_device_eval_batch_size=params["batch_size"],
    num_train_epochs=params["num_train_epochs"],
    weight_decay=params["weight_decay"],
    # eval_strategy='epoch',
    # save_strategy='epoch',
    eval_strategy='steps',
    eval_steps=50,
    # save_strategy='no',
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_NAME,
    logging_dir="./logs",
    logging_steps=0.1,
    seed=params["seed"],
)

comet_ml is installed but the Comet API Key is not configured. Please set the `COMET_API_KEY` environment variable to enable Comet logging. Check out the documentation for other ways of configuring it: https://www.comet.com/docs/v2/guides/experiment-management/configure-sdk/#set-the-api-key


In [31]:
from transformers import Trainer, EarlyStoppingCallback

trainer = Trainer(
    model,
    training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["dev"],
    tokenizer=tokenizer,
    data_collator=collate_fn,
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=7)],
)

  trainer = Trainer(


[2025-01-29 14:35:54,508] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)


In [None]:
train_result = trainer.train()



Step,Training Loss,Validation Loss,F1 Macro,F1 Micro,Roc Auc,Accuracy
50,No log,0.78585,0.319198,0.409222,0.577942,0.077586
100,No log,0.645289,0.331587,0.403909,0.59074,0.086207
150,0.733200,0.571739,0.342862,0.439344,0.613608,0.137931
200,0.733200,0.505904,0.469168,0.554517,0.683534,0.224138
250,0.486500,0.440056,0.597984,0.658824,0.753825,0.344828




In [33]:
metrics = train_result.metrics
max_train_samples = len(ds_train)
metrics["train_samples"] = min(max_train_samples, len(ds_train))
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

***** train metrics *****
  epoch                    =         5.0
  total_flos               = 138252112GF
  train_loss               =      0.3687
  train_runtime            =  1:12:54.02
  train_samples            =        2768
  train_samples_per_second =       6.328
  train_steps_per_second   =       0.265


In [34]:
trainer.save_model(f'{OUTPUT_DIR}/fine-tuned-model')

In [35]:
import json

with open(f'{OUTPUT_DIR}/config.json', "w") as f:
    json.dump(params, f)

# Evaluate

In [35]:
trainer.evaluate()

{'eval_loss': 0.43654534220695496,
 'eval_f1_macro': 0.7192262143641622,
 'eval_f1_micro': 0.7371428571428571,
 'eval_roc_auc': 0.8107842034203421,
 'eval_accuracy': 0.41379310344827586,
 'eval_runtime': 11.1312,
 'eval_samples_per_second': 10.421,
 'eval_steps_per_second': 0.539,
 'epoch': 5.0359712230215825}

# Make predictions

In [52]:
import os
import torch
import pandas as pd
from tqdm import tqdm


def make_predictions(
    model,
    tokenizer,
    df,
    labels,
    id2label,
    label2id,
    device,
    file_path,
    max_len=MAX_LEN,
    batch_size=8
):
    model.eval()

    all_ids = []
    all_predictions = []
    with torch.no_grad():
        for i in tqdm(range(0, len(df), batch_size)):
            ids = df[i:i + batch_size].id.tolist()
            texts = df[i:i + batch_size].text.tolist()

            inputs = tokenizer(
                texts,
                padding="max_length",
                truncation=True,
                max_length=max_len,
                return_tensors="pt",
            )

            input_ids = inputs['input_ids'].to(device)
            attention_mask = inputs['attention_mask'].to(device)

            outputs = model(input_ids, attention_mask)
            logits = outputs.logits

            # apply sigmoid + threshold
            sigmoid = torch.nn.Sigmoid()
            probs = sigmoid(logits.squeeze().cpu())
            predictions = np.zeros(probs.shape)
            predictions[np.where(probs >= 0.5)] = 1

            all_ids.extend(ids)
            all_predictions.extend(predictions)

    # Create a list of dictionaries for each row
    rows = []
    for idx, text_id in enumerate(all_ids):
        row = {"id": text_id}
        row.update({label: int(all_predictions[idx][label2id[label]]) for label in labels})
        rows.append(row)

    # Convert the list of dictionaries to a DataFrame
    df_predictions = pd.DataFrame(rows)

    # Create the csv file
    os.makedirs(os.path.dirname(file_path), exist_ok=True)

    df_predictions.to_csv(file_path, index=False)

    return df_predictions


In [121]:
df_dev_test

Unnamed: 0,id,text,anger,fear,joy,sadness,surprise
0,eng_dev_track_a_00001,Older sister (23 at the time) is a Scumbag Stacy.,,,,,
1,eng_dev_track_a_00002,"And I laughed like this: garhahagar, because m...",,,,,
2,eng_dev_track_a_00003,It overflowed and brown shitty diarrhea water ...,,,,,
3,eng_dev_track_a_00004,Its very dark and foggy.,,,,,
4,eng_dev_track_a_00005,"Then she tried to, like, have sex with/strangl...",,,,,
...,...,...,...,...,...,...,...
111,eng_dev_track_a_00112,My heart was beating fast from excitement.,,,,,
112,eng_dev_track_a_00113,A fraying rope stretches down from the rafters.,,,,,
113,eng_dev_track_a_00114,so i cried my eyes out and did the drawing.,,,,,
114,eng_dev_track_a_00115,Never been so close to a group ass-wooping in ...,,,,,


In [53]:
df_predictions = make_predictions(
    trainer.model,
    tokenizer,
    # df_dev_test,
    df_test,
    labels,
    id2label,
    label2id,
    DEVICE,
    f'{OUTPUT_DIR}/submission/track_a/pred_{LANG}.csv',
    max_len=MAX_LEN,
    batch_size=8,
)

100%|██████████| 346/346 [04:24<00:00,  1.31it/s]


In [54]:
df_predictions

Unnamed: 0,id,anger,fear,joy,sadness,surprise
0,eng_test_track_a_00001,1,1,0,0,0
1,eng_test_track_a_00002,0,1,0,1,0
2,eng_test_track_a_00003,1,1,0,0,0
3,eng_test_track_a_00004,0,1,0,0,0
4,eng_test_track_a_00005,0,1,1,0,0
...,...,...,...,...,...,...
2762,eng_test_track_a_02763,0,0,1,0,0
2763,eng_test_track_a_02764,0,0,1,0,0
2764,eng_test_track_a_02765,0,0,1,0,0
2765,eng_test_track_a_02766,0,0,1,0,0


In [55]:
!echo $OUTPUT_DIR

./runs/track_a/deepseek-ai/DeepSeek-R1-Distill-Llama-8B/eng_exp_001


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [56]:
!cd ./Submodules/SemEval2025-Task11-Evaluation && python check_submission.py \
    -s /root/repos/SemEval-2025/emotion-detection-semeval-2025/$OUTPUT_DIR/submission/track_a

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)



Checklist:
+--------------------------+----------+-------------------------------------------------+
| Item                     | Status   | Comment                                         |
| Submission folder.       | Pass     | Found valid folder: track_a                     |
+--------------------------+----------+-------------------------------------------------+
| Submission folder.       | Pass     | Folder name: track_a, starts with "track_"      |
+--------------------------+----------+-------------------------------------------------+
| Task name.               | Pass     | Task: A                                         |
+--------------------------+----------+-------------------------------------------------+
| Prediction files.        | Pass     | Found 1 prediction files: pred_eng.csv          |
+--------------------------+----------+-------------------------------------------------+
| Prediction files.        | Pass     | All prediction files are in the correct format. 

In [57]:
!cd /root/repos/SemEval-2025/emotion-detection-semeval-2025/$OUTPUT_DIR/submission && \
    zip -r track_a.zip track_a/

  adding: track_a/ (stored 0%)
  adding: track_a/pred_eng.csv (deflated 90%)


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
