In [None]:
import os
import json
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableLambda
from key import OPENAI_KEY, LANGSMITH_KEY # Import your own keys
from operator import itemgetter
import time

os.environ["OPENAI_API_KEY"] = OPENAI_KEY
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = LANGSMITH_KEY  

In [None]:
text_dir = "../data/text/Subtask_2_test.json" # path to json file
anno = json.load(open(text_dir))
video_captions = json.load(open("eval_proc_out.json")) # loading processed video captions
explained_emotions = json.load(open("emotion_explainations.json")) # loading explainations for train set conversations with all emotions

In [None]:
embeddings = OpenAIEmbeddings()
db = FAISS.load_local("all_emotion_index", embeddings)
model = ChatOpenAI(model="gpt-3.5-turbo-1106")
output_parser = StrOutputParser()

In [None]:
batches = [{"conversation": a["conversation"], "scene": video_captions[str(a["conversation_ID"])]} for a in anno]

In [None]:
emotion_rag = """
You are a die-hard fan of the popular Friends TV show. 
You have all the knowledge of all the seasons and are familiar with all the characters. 
Your task is to recognize emotions in utterances. 
Here's an annotated example with recognized emotion and explanation:

{example}

Like above example annotate the following Conversation:
Context for the scene is given below:
{scene}

Conversation:
{conversation}

Classify the emotional state of the speaker in each utterance into ONLY one out of the 6 emotions:
Anger, Disgust, Fear, Joy, Sadness, Surprise. 
The emotion of the speaker is determined by the context of the conversation. 
Give explanation for your classification using the context. Only Use the above 6 emotion categories. 
If the emotion is not in any category, is a mix of several categories, or is ambiguous, 
classify the state as "Neutral".  Sarcastic comments may be categorized as Neutral.
Format the output as JSON as the given example. No plain text.
"""

In [None]:
def format_convo(convo):
    out = ""
    for i, utt in enumerate(convo):
        out += f'\n{i+1}. {utt["speaker"]}: {utt["text"]}'
    return out

def to_json(d):
    return json.dumps(d)
    
def retrieve_example(convo):
    conv_string = format_convo(convo)
    closest = db.similarity_search(conv_string)
    closest_idx = closest[0].page_content.split("\n")[0].strip()
    closest_convo = explained_emotions[closest_idx]
    return json.dumps(closest_convo)

emotion_rag_prompt = ChatPromptTemplate.from_template(emotion_rag)

emotion_pipeline = (
    {"conversation": itemgetter("conversation") | RunnableLambda(to_json),
     "example": itemgetter("conversation") | RunnableLambda(retrieve_example),
     "scene": itemgetter("scene")}
    | emotion_rag_prompt
    | model
    | output_parser
)

In [None]:
emotion_eval_labelled = {}
# emotion_eval_labelled = json.load(open("emotion_eval_labelled.json")) # if resuming

In [None]:
# Keep repeating untill all the conversations are processed sucessfully
save_step = 10
for i, a in enumerate(anno):
    conv_id = a["conversation_ID"]
    if str(conv_id) in emotion_eval_labelled or conv_id in emotion_eval_labelled: continue
    batch = {"conversation": a["conversation"], "scene": video_captions[str(conv_id)]}
    out = emotion_pipeline.invoke(batch)
    
    try:
        emotion_eval_labelled[conv_id] = {"conversation_ID": conv_id,
                                        "conversation": json.loads(out)}
        print("[{}/{}] Processed Conv {}".format(i+1, len(anno), conv_id))
    except:
        print("[{}/{}] Failed to Process Conv {}".format(i+1, len(anno), conv_id))
              
    if i%save_step == 0:
        print("json dump...")
        json.dump(emotion_eval_labelled, open("emotion_eval_labelled.json", "w"))
    time.sleep(0.20)  

In [None]:
json.dump(emotion_eval_labelled, open("emotion_eval_labelled.json", "w"))