In [2]:
import pandas as pd
import ollama
import os
from tqdm import tqdm

from langchain_community.llms import Ollama
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import LLMChain

In [6]:
fnl_data_dir = '../FinalDataset'
rslt_dir = '../models&results'
llm_model = 'llama3'

In [7]:
try:
    train_df = pd.read_csv(os.path.join(fnl_data_dir, 'trainFinal.csv')).dropna()
    test_df = pd.read_csv(os.path.join(fnl_data_dir, 'testFinal.csv')).dropna()
except Exception as e:
    print(f"ERROR: Cleaned data files not found in '{fnl_data_dir}'.")
    print("Please run Notebook 1 first to generate these files.")
    raise e

In [9]:
zero_shot_summaries = []
for index, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Zero-Shot Summaries"):
    prompt = f"""
    Summarize the following emotionally sensitive dialogue in a single, concise sentence.
    The primary emotion of the dialogue is: {row['emotion']}

    Dialogue:
    {row['dialogue']}

    Summary:
    """
    try:
        response = ollama.generate(model=llm_model, prompt=prompt)
        summary = response['response'].strip()
        zero_shot_summaries.append({'conv_id': row['conv_id'], 'generated_summary': summary})
    except Exception as e:
        print(f"Error querying Ollama for conv_id {row['conv_id']}: {e}")
        zero_shot_summaries.append({'conv_id': row['conv_id'], 'generated_summary': 'OLLAMA_ERROR'})

# Save the results
zero_shot_results_df = pd.DataFrame(zero_shot_summaries)
zero_shot_results_path = os.path.join(rslt_dir, 'llm_zero_shot_summaries.csv')
zero_shot_results_df.to_csv(zero_shot_results_path, index=False)
print(f"Zero-shot LLM summaries saved to '{zero_shot_results_path}'")


Zero-Shot Summaries: 100%|██████████| 1776/1776 [1:56:37<00:00,  3.94s/it] 

Zero-shot LLM summaries saved to '../models&results/llm_zero_shot_summaries.csv'





In [10]:
print(f"STARTING EXPERIMENT: RAG (FEW-SHOT) LLM ({llm_model})")

#Build the Knowledge Base for Retrieval
print("\nbuilding the knowledge base for RAG...")

embedding_model_name = "all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

print("Creating vector store...")
vector_store = FAISS.from_texts(texts=train_df["dialogue"].tolist(),embedding=embeddings)
print("Vector store created successfully.")


STARTING EXPERIMENT: RAG (FEW-SHOT) LLM (llama3)

building the knowledge base for RAG...


  embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)


Creating vector store...
Vector store created successfully.


In [11]:

rag_prompt_template = """
INSTRUCTION: You are an expert in summarizing emotionally sensitive conversations.
Use the following three examples of dialogues and their summaries as a guide.

EXAMPLE 1:
Dialogue: {example_dialogue_1}
Summary: {example_summary_1}

EXAMPLE 2:
Dialogue: {example_dialogue_2}
Summary: {example_summary_2}

EXAMPLE 3:
Dialogue: {example_dialogue_3}
Summary: {example_summary_3}

Now, using these examples as a guide, summarize the following new dialogue in a single, concise sentence.
The primary emotion of the new dialogue is: {emotion}

New Dialogue:
{dialogue}

Final Summary:
"""

In [12]:
llm = Ollama(model=llm_model)
rag_prompt = PromptTemplate(
    template=rag_prompt_template,
    input_variables=["example_dialogue_1", "example_summary_1",
                     "example_dialogue_2", "example_summary_2",
                     "example_dialogue_3", "example_summary_3",
                     "emotion", "dialogue"])
rag_chain = LLMChain(llm=llm, prompt=rag_prompt)


  llm = Ollama(model=llm_model)
  rag_chain = LLMChain(llm=llm, prompt=rag_prompt)


In [13]:
rag_summaries = []
for index, row in tqdm(test_df.iterrows(), total=len(test_df), desc="RAG Summaries"):
    retrvd_docs = vector_store.similarity_search(row['dialogue'], k=3)
    retrvd_eg = []
    for doc in retrvd_docs:
        original_row = train_df[train_df['dialogue'] == doc.page_content].iloc[0]
        retrvd_eg.append({"dialogue": original_row['dialogue'],"summary": original_row['target_summary']})

    chain_inputs = {
        "example_dialogue_1": retrvd_eg[0]['dialogue'],
        "example_summary_1": retrvd_eg[0]['summary'],
        "example_dialogue_2": retrvd_eg[1]['dialogue'],
        "example_summary_2": retrvd_eg[1]['summary'],
        "example_dialogue_3": retrvd_eg[2]['dialogue'],
        "example_summary_3": retrvd_eg[2]['summary'],
        "emotion": row['emotion'],
        "dialogue": row['dialogue']}

    try:
        response = rag_chain.invoke(chain_inputs)
        summary = response['text'].strip()
        rag_summaries.append({'conv_id': row['conv_id'], 'generated_summary': summary})
    except Exception as e:
        print(f"Error with RAG chain for conv_id {row['conv_id']}: {e}")
        rag_summaries.append({'conv_id': row['conv_id'], 'generated_summary': 'RAG_ERROR'})

RAG Summaries: 100%|██████████| 1776/1776 [3:19:16<00:00,  6.73s/it]  


In [15]:
rag_results_df = pd.DataFrame(rag_summaries)
rag_results_path = os.path.join(rslt_dir, 'llm_rag_summaries.csv')
rag_results_df.to_csv(rag_results_path, index=False)
print(f"RAG summaries saved to '{rag_results_path}'")                                                                                                                                       

RAG summaries saved to '../models&results/llm_rag_summaries.csv'
