In [1]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, default_data_collator, get_linear_schedule_with_warmup
from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, PrefixTuningConfig, TaskType
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
import torch
import os
import pandas as pd

# load csv from data/MELD
train_df = pd.read_csv("data/MELD/train_sent_emo.csv")
valid_df = pd.read_csv("data/MELD/dev_sent_emo.csv")
test_df = pd.read_csv("data/MELD/test_sent_emo.csv")


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Apply prefix tuning to solve the emotion recognition task
# First, do the language modeling task with prefix tuning
# The prefix are the emotions in the dataset, and the model will learn to predict the next word based on the prefix

def get_data_loader(df, tokenizer, max_length=128, batch_size=16):
    """
    Create a DataLoader for the dataset.
    """
    def encode(examples):
        return tokenizer(
            examples['Utterance'],
            truncation=True,
            padding='max_length',
            max_length=max_length,
            return_tensors='pt'
        )

    # Encode the dataset
    encoded_dataset = df.apply(encode, axis=1).tolist()
    
    # Create DataLoader
    data_loader = DataLoader(
        encoded_dataset,
        batch_size=batch_size,
        collate_fn=default_data_collator
    )
    
    return data_loader

