# Author: Seunghee Kim
- Created on: 2024-12-05

In [1]:
import pandas as pd
import numpy as np
from datasets import Dataset, DatasetDict
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import torch

In [None]:
class CFG:
    SEED = 1
    EPOCH = 3
    LR = 5e-5
    WEIGHT_DECAY = 0.01
    BATCH_SIZE = 16
    
    SAVE_STRATEGY = 'epoch'
    EVALUATION_STRATEGY = 'epoch'
    METRIC = 'f1'
    
    EXP_NUM = 1
    
    
    model = 'microsoft/deberta-v3-xsmall'
    
    # PATH
    train_df_path = './df_final_train_v1.csv'
    valid_df_path = './df_final_valid_v1.csv'
    
    test_df_v1_path = './df_final_test_v1.csv'
    test_df_v2_path = './df_final_test_v2.csv'
    
    model_output_path = f'./results_{EXP_NUM}'
    test_v1_inference_path = f'test_v1_predictions_{EXP_NUM}.csv'
    test_v2_inference_path = f'test_v2_predictions_{EXP_NUM}.csv'
    


In [3]:
train_df = pd.read_csv(CFG.train_df_path)
valid_df = pd.read_csv(CFG.valid_df_path)
test_df_v1 = pd.read_csv(CFG.test_df_v1_path)
test_df_v2 = pd.read_csv(CFG.test_df_v2_path)



train_df = train_df.sample(frac=1, random_state=CFG.SEED).reset_index(drop=True)
valid_df = valid_df.sample(frac=1, random_state=CFG.SEED).reset_index(drop=True)
test_df_v1 = test_df_v1.sample(frac=1, random_state=CFG.SEED).reset_index(drop=True)
test_df_v2 = test_df_v2.sample(frac=1, random_state=CFG.SEED).reset_index(drop=True)



def preprocess_data(df):
    return Dataset.from_pandas(df[['text', 'label']])

train_dataset = preprocess_data(train_df)
valid_dataset = preprocess_data(valid_df)
test_v1_dataset = preprocess_data(test_df_v1)
test_v2_dataset = preprocess_data(test_df_v2)

model_name = CFG.model
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=512)

train_dataset = train_dataset.map(tokenize_function, batched=True)
valid_dataset = valid_dataset.map(tokenize_function, batched=True)
test_v1_dataset = test_v1_dataset.map(tokenize_function, batched=True)
test_v2_dataset = test_v2_dataset.map(tokenize_function, batched=True)




train_dataset = train_dataset.rename_column("label", "labels")
valid_dataset = valid_dataset.rename_column("label", "labels")
test_v1_dataset = test_v1_dataset.rename_column("label", "labels")
test_v2_dataset = test_v2_dataset.rename_column("label", "labels")



train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
valid_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_v1_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])
test_v2_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])




model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

training_args = TrainingArguments(
    output_dir=CFG.model_output_path,
    evaluation_strategy=CFG.EVALUATION_STRATEGY,
    save_strategy=CFG.SAVE_STRATEGY,
    per_device_train_batch_size=CFG.BATCH_SIZE,
    per_device_eval_batch_size=CFG.BATCH_SIZE,
    num_train_epochs=CFG.EPOCH,
    weight_decay=CFG.WEIGHT_DECAY,
    save_total_limit=1,
    load_best_model_at_end=True,
    metric_for_best_model=CFG.METRIC,
    learning_rate=CFG.LR,
    greater_is_better=True,
    seed=CFG.SEED
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=valid_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

eval_results = trainer.evaluate()
print("Validation Results:\n", eval_results)


# test v1

test_v1_predictions = trainer.predict(test_v1_dataset)

test_v1_preds = torch.argmax(torch.tensor(test_v1_predictions.predictions), dim=-1).numpy()
test_v1_labels = test_v1_predictions.label_ids


def compute_metrics_for_test(labels, preds):
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

test_metrics = compute_metrics_for_test(test_v1_labels, test_v1_preds)

print("Test v1 Inference Results:", test_metrics)

test_df_v1['predicted_label'] = test_v1_preds
test_df_v1.to_csv(CFG.test_v1_inference_path, index=False)



# test v2
test_v2_predictions = trainer.predict(test_v2_dataset)
test_v2_preds = torch.argmax(torch.tensor(test_v2_predictions.predictions), dim=-1).numpy()
test_v2_labels = test_v2_predictions.label_ids

def compute_metrics_for_test(labels, preds):
    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='binary')
    acc = accuracy_score(labels, preds)
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

test_metrics = compute_metrics_for_test(test_v2_labels, test_v2_preds)

print("Test v2 Inference Results:", test_metrics)

test_df_v2['predicted_label'] = test_v2_preds
test_df_v2.to_csv(CFG.test_v2_inference_path, index=False)

Map:   0%|          | 0/9000 [00:00<?, ? examples/s]

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at skt/kobert-base-v1 and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False)
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mgyg9325[0m ([33munig[0m). Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/1689 [00:00<?, ?it/s]

You're using a XLNetTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


{'loss': 0.6769, 'learning_rate': 3.5198342214328006e-05, 'epoch': 0.89}


  0%|          | 0/57 [00:00<?, ?it/s]

Checkpoint destination directory ./results_1\checkpoint-563 already exists and is non-empty.Saving will proceed but saved results may be invalid.


{'eval_loss': 0.7074477076530457, 'eval_accuracy': 0.43555555555555553, 'eval_precision': 0.046875, 'eval_recall': 0.006666666666666667, 'eval_f1': 0.011673151750972763, 'eval_runtime': 13.5276, 'eval_samples_per_second': 66.53, 'eval_steps_per_second': 4.214, 'epoch': 1.0}
{'loss': 0.6895, 'learning_rate': 2.039668442865601e-05, 'epoch': 1.78}


  0%|          | 0/57 [00:00<?, ?it/s]

{'eval_loss': 0.674920916557312, 'eval_accuracy': 0.6088888888888889, 'eval_precision': 0.5628205128205128, 'eval_recall': 0.9755555555555555, 'eval_f1': 0.7138211382113822, 'eval_runtime': 13.6736, 'eval_samples_per_second': 65.82, 'eval_steps_per_second': 4.169, 'epoch': 2.0}
{'loss': 0.6971, 'learning_rate': 5.595026642984015e-06, 'epoch': 2.66}


  0%|          | 0/57 [00:00<?, ?it/s]

  _warn_prf(average, modifier, msg_start, len(result))


{'eval_loss': 0.6775010824203491, 'eval_accuracy': 0.5, 'eval_precision': 0.0, 'eval_recall': 0.0, 'eval_f1': 0.0, 'eval_runtime': 13.7817, 'eval_samples_per_second': 65.304, 'eval_steps_per_second': 4.136, 'epoch': 3.0}
{'train_runtime': 1297.7489, 'train_samples_per_second': 20.805, 'train_steps_per_second': 1.301, 'train_loss': 0.6886849908732747, 'epoch': 3.0}


  0%|          | 0/57 [00:00<?, ?it/s]

Validation Results:
 {'eval_loss': 0.674920916557312, 'eval_accuracy': 0.6088888888888889, 'eval_precision': 0.5628205128205128, 'eval_recall': 0.9755555555555555, 'eval_f1': 0.7138211382113822, 'eval_runtime': 13.6986, 'eval_samples_per_second': 65.7, 'eval_steps_per_second': 4.161, 'epoch': 3.0}


  0%|          | 0/57 [00:00<?, ?it/s]

Test v1 Inference Results: {'accuracy': 0.6055555555555555, 'precision': 0.5608194622279129, 'recall': 0.9733333333333334, 'f1': 0.7116165718927701}


  0%|          | 0/57 [00:00<?, ?it/s]

Test v2 Inference Results: {'accuracy': 0.5611111111111111, 'precision': 0.5371120107962213, 'recall': 0.8844444444444445, 'f1': 0.6683459277917716}
