In [None]:
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, DataCollatorWithPadding, TrainingArguments, Trainer
from sklearn.metrics import precision_recall_fscore_support, accuracy_score
import numpy as np

# Load datasets into Hugging Face format
dataset = load_dataset('csv', data_files={'train': 'df_train.csv', 'valid': 'df_valid.csv', 'test': 'df_test.csv'})

# Define a function to preprocess data
def preprocess_function(examples, tokenizer):
    return tokenizer(examples['sentence'], truncation=True, padding='max_length', max_length=128)

# Define metrics function
def compute_metrics(p):
    preds = p.predictions.argmax(-1)
    precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='weighted')
    accuracy = accuracy_score(p.label_ids, preds)
    return {'accuracy': accuracy, 'f1': f1, 'precision': precision, 'recall': recall}

# Model names
model_names = [
    "w11wo/javanese-bert-small-imdb-classifier",
    "w11wo/javanese-gpt2-small-imdb-classifier",
    "w11wo/javanese-distilbert-small-imdb-classifier"
]

# Hyperparameters to try
learning_rates = [5e-5, 2e-5, 1e-5]
batch_sizes = [16, 32]
num_epochs = 3

results = []

# Hyperparameter tuning
for model_name in model_names:
    for learning_rate in learning_rates:
        for batch_size in batch_sizes:
            print(f"\nTraining and evaluating model: {model_name} with learning_rate={learning_rate} and batch_size={batch_size}")
            
            # Load tokenizer and model
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=4, ignore_mismatched_sizes=True)
            
            # Preprocess data
            tokenized_data = dataset.map(lambda x: preprocess_function(x, tokenizer), batched=True)
            tokenized_data = tokenized_data.rename_column("label", "labels")  # Rename label column to 'labels' for Trainer
            
            data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
            
            # Define training arguments
            training_args = TrainingArguments(
                output_dir=f'./results/{model_name}/lr_{learning_rate}_bs_{batch_size}',
                learning_rate=learning_rate,
                per_device_train_batch_size=batch_size,
                per_device_eval_batch_size=batch_size,
                num_train_epochs=num_epochs,
                weight_decay=0.01,
                evaluation_strategy="epoch",
                save_strategy="epoch",
                load_best_model_at_end=True,
                push_to_hub=False
            )
            
            # Initialize Trainer
            trainer = Trainer(
                model=model,
                args=training_args,
                train_dataset=tokenized_data["train"],
                eval_dataset=tokenized_data["valid"],
                tokenizer=tokenizer,
                data_collator=data_collator,
                compute_metrics=compute_metrics
            )
            
            # Train the model
            trainer.train()
            
            # Evaluate on validation set using predict to get predictions
            validation_results = trainer.predict(tokenized_data["valid"])
            metrics = compute_metrics(validation_results)
            
            # Store results
            results.append({
                'Model': model_name,
                'Learning Rate': learning_rate,
                'Batch Size': batch_size,
                'Accuracy': metrics['accuracy'],
                'F1 Score': metrics['f1'],
                'Precision': metrics['precision'],
                'Recall': metrics['recall']
            })

# Create a DataFrame for the results
results_df = pd.DataFrame(results)

# Display the results table
print("\nHyperparameter Tuning Results:")
print(results_df)

In [None]:
results_df
