In [1]:
import pandas as pd
import os
from pathlib import Path
import random
import re
from tqdm.notebook import tqdm

from transformers import AutoTokenizer, AutoModelForCausalLM, AutoModelForSeq2SeqLM
import torch

DSDIR = Path(os.environ["DSDIR"])
DATA_PATH = DSDIR / "meld_fidle"

In [2]:
mode = "train"
WAV_DATA = DATA_PATH / f"{mode}_wav"
PT_DATA = DATA_PATH / f"{mode}_pt"
CSV_DF = DATA_PATH / f"{mode}_sent_emo.csv"

In [3]:
df = pd.read_csv(CSV_DF)
df["question"] = ""
df["answer"] = ""
df

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,question,answer
0,1,Why do all youre coffee mugs have numbers on ...,Mark,surprise,positive,0,0,3,19,"00:14:38,127","00:14:40,378",,
1,2,Oh. Thats so Monica can keep track. That way ...,Rachel,anger,negative,0,1,3,19,"00:14:40,629","00:14:47,385",,
2,3,Y'know what?,Rachel,neutral,neutral,0,2,3,19,"00:14:56,353","00:14:57,520",,
3,19,"Come on, Lydia, you can do it.",Joey,neutral,neutral,1,0,1,23,"0:10:44,769","0:10:46,146",,
4,20,Push!,Joey,joy,positive,1,1,1,23,"0:10:46,146","0:10:46,833",,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2605,2760,"Yeah, I mean, come on Ross, no one will even n...",Rachel,neutral,neutral,279,11,6,4,"00:14:35,457","00:14:40,211",,
2606,2761,They’re not listening too me?,Ross,surprise,negative,279,12,6,4,"00:14:42,256","00:14:43,840",,
2607,2762,Of course they’re listening to you! Everybody ...,Rachel,neutral,neutral,279,13,6,4,"00:14:44,008","00:14:48,511",,
2608,2763,Monica you really think I should try this phas...,Ross,neutral,neutral,279,14,6,4,"00:14:48,138","00:14:52,390",,


In [4]:
# Initialize the model and its tokenizer
model = AutoModelForCausalLM.from_pretrained(
    DSDIR / "HuggingFace_Models/meta-llama/Meta-Llama-3-8B-Instruct",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,  # Allow using code that was not written by HuggingFace
    attn_implementation="flash_attention_2"  # Optimize the model with Flash Attention
).to("cuda")
tokenizer = AutoTokenizer.from_pretrained(DSDIR / "HuggingFace_Models/meta-llama/Meta-Llama-3-8B-Instruct")
tokenizer.pad_token_id = tokenizer.eos_token_id

def generation(prompt, **gen_parameters):
    """Generate text from a prompt and print it."""
    model_inp = tokenizer(prompt, return_tensors="pt").to("cuda")
    # the generate() method is a succession of forward (auto-regressive) 
    out = model.generate(input_ids=model_inp["input_ids"], pad_token_id=tokenizer.pad_token_id, **gen_parameters)
    return tokenizer.decode(out[0])

You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
def gen_quest_ans(utterence, speaker, emotion):
    prompt1 = f"""Write a question and an answer that ask who is the speaker or what is his emotion.
Context: The speaker is Monica. He or she is sadness. He or she said "I'm gonna miss you!".
Question:
Who is the speaker in this dialogue?
Answer:
Monica, who is saying "I'm gonna miss you!" and expressing sadness.<|end_of_text|>
Context: The speaker is {speaker}. He or she is {emotion}. He or she said "{utterence}".
Question:"""

    prompt2 = f"""Write a question and an answer that ask who is the speaker or what is his emotion.
Context: The speaker is Chandler. The emotion of the speach is joy. He or she said "You're a genius!".
Question:
What is the speaker's emotion?
Answer:
The speaker's emotion is Joy.<|end_of_text|>
Context: The speaker is {speaker}. He or she is {emotion}. He or she said "{utterence}".
Question:"""

    prompt = random.choice([prompt1, prompt2])
    retry = True

    while retry:
        try:
            out = generation(prompt, do_sample=True, max_new_tokens=300)
            pattern = r"""Question:.*Question:([A-Za-zà-ÿ\s0-9,.;:?!'"]*).*Answer:"""
            reg_match = re.search(pattern, out, re.DOTALL)
            question = reg_match[1].strip()
        
            pattern = r"""Answer:.*Answer:([A-Za-zà-ÿ\s0-9,.;:?!'"]*).*"""
            pattern = r"""Answer:([A-Za-zà-ÿ\s0-9,.;:?!'"]*).*"""
            reg_match = re.search(pattern, out, re.DOTALL)
            answer = reg_match[1].strip()
            retry = False

        except:
            retry = True
            
    return question, answer

In [9]:
idx = 4
gen_quest_ans(df.loc[idx]["Utterance"], df.loc[idx]["Speaker"], df.loc[idx]["Emotion"])

('Who is the speaker in this dialogue and what is his emotion?',
 'Monica, who is saying "I\'m gonna miss you!" and expressing sadness.')

In [10]:
loop = tqdm(range(len(df)))

for idx in loop:
    if (PT_DATA / f"dia{df.loc[idx]['Dialogue_ID']}_utt{df.loc[idx]['Utterance_ID']}.pt").exists():
        question, answer = gen_quest_ans(df.loc[idx]["Utterance"], df.loc[idx]["Speaker"], df.loc[idx]["Emotion"])
        df.loc[idx, "question"] = question
        df.loc[idx, "answer"] = answer
    else:
        df.drop(idx, inplace=True)

df.reset_index(drop=True, inplace=True)
df.to_csv(f"{mode}_sent_emo.csv", index=False)
df

  0%|          | 0/2610 [00:00<?, ?it/s]

Unnamed: 0,Sr No.,Utterance,Speaker,Emotion,Sentiment,Dialogue_ID,Utterance_ID,Season,Episode,StartTime,EndTime,question,answer
0,1,Why do all youre coffee mugs have numbers on ...,Mark,surprise,positive,0,0,3,19,"00:14:38,127","00:14:40,378",Who is the speaker in this dialogue?,"Monica, who is saying ""I'm gonna miss you!"" an..."
1,2,Oh. Thats so Monica can keep track. That way ...,Rachel,anger,negative,0,1,3,19,"00:14:40,629","00:14:47,385",What is the speaker's emotion?,The speaker's emotion is Joy.
2,3,Y'know what?,Rachel,neutral,neutral,0,2,3,19,"00:14:56,353","00:14:57,520",Who is the speaker?,The speaker's emotion is Joy.
3,19,"Come on, Lydia, you can do it.",Joey,neutral,neutral,1,0,1,23,"0:10:44,769","0:10:46,146",The speaker is trying to,The speaker's emotion is Joy.
4,20,Push!,Joey,joy,positive,1,1,1,23,"0:10:46,146","0:10:46,833",Who is the speaker in this dialogue?,"Monica, who is saying ""I'm gonna miss you!"" an..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2605,2760,"Yeah, I mean, come on Ross, no one will even n...",Rachel,neutral,neutral,279,11,6,4,"00:14:35,457","00:14:40,211",Who is the speaker?,The speaker's emotion is Joy.
2606,2761,They’re not listening too me?,Ross,surprise,negative,279,12,6,4,"00:14:42,256","00:14:43,840",Who is the speaker?,The speaker's emotion is Joy.
2607,2762,Of course they’re listening to you! Everybody ...,Rachel,neutral,neutral,279,13,6,4,"00:14:44,008","00:14:48,511",Who is the speaker in this dialogue?,"Monica, who is saying ""I'm gonna miss you!"" an..."
2608,2763,Monica you really think I should try this phas...,Ross,neutral,neutral,279,14,6,4,"00:14:48,138","00:14:52,390",Who is the speaker in this dialogue?,"Monica, who is saying ""I'm gonna miss you!"" an..."
