In [5]:
import json
import os
from dotenv import load_dotenv
from athina.llms.openai_service import OpenAiService
from athina.keys import OpenAiApiKey

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OpenAiApiKey.set_key(OPENAI_API_KEY)

DEFAULT_MODEL = "gpt-3.5-turbo"

In [6]:
# LOADERS

dataset_file_path = '../athina/datasets/conversations.json'

def load_data():
    # Load the JSON data from the given file path
    with open(dataset_file_path, 'r') as file:
        data = json.load(file)

    # Return the entire dataset
    return data

def load_single_conversation():
    data = load_data()
    data_without_none = [msg for msg in data[0] if msg is not None]
    return data_without_none

def load_conversations():
    data = load_data()
    data_without_none = [{
        "messages": [msg for msg in row if msg is not None]
    } for row in data]
    return data_without_none
    

In [7]:
# Get a single test conversation
conversations = load_conversations()[0:3]

conversations

[{'messages': ['Customer: Hi, Im interested in purchasing a new smartphone. Can you help me choose the best one?',
   "Salesman: I'm sorry I cannot help you with that.",
   'Customer: Im looking for a laptop that can handle graphic design software. Any recommendations?',
   'Salesman: Absolutely! In order to find the best laptop for your needs, could you provide me with more details about the specific graphic design software youll be using?',
   'Customer: Im considering upgrading my companys server. What options do you have available?',
   'Salesman: Thank you for considering us for your server upgrade. To better understand your requirements, could you please share some information about your current server setup and the specific needs you have in mind?',
   'Customer: Im in the market for a new smartwatch. Can you help me choose the right one?',
   'Salesman: Absolutely! Before we proceed, could you let me know what features are most important to you in a smartwatch? That way, I can 

In [9]:
from athina.evals import ConversationResolution

res = ConversationResolution(failure_threshold=0.5).run_batch(data=conversations)

res.to_df()

Unnamed: 0,messages,display_name,failed,grade_reason,runtime,model,conversation_resolution
0,"[Customer: Hi, Im interested in purchasing a new smartphone. Can you help me choose the best one?, Salesman: I'm sorry I cannot help you with that., Customer: Im looking for a laptop that can handle graphic design software. Any recommendations?, Salesman: Absolutely! In order to find the best laptop for your needs, could you provide me with more details about the specific graphic design software youll be using?, Customer: Im considering upgrading my companys server. What options do you have ...",Conversation Resolution,False,"The following messages were not resolved:\n\n-""Hi, Im interested in purchasing a new smartphone. Can you help me choose the best one?"" (Resolution: Unresolved)\n: The AI's response did not address the user's request for help in choosing the best smartphone.\n",5601,gpt-3.5-turbo,0.833333
1,"[Customer: Hi, Im interested in learning more about your health products., Salesman: Great! Im happy to help. Tell me, what specific health concerns do you have?, Customer: Ive been experiencing digestive issues lately and Im looking for a solution., Salesman: I understand how frustrating that can be. Many of our customers have found relief with our digestive health supplements. Would you like me to provide more information?, Customer: Ive tried different products before, but nothing seems t...",Conversation Resolution,False,All messages were resolved,5920,gpt-3.5-turbo,1.0
2,"[Customer: Hi, Im interested in investing in the stock market. Can you help me?, Salesman: Of course! Id be happy to assist you. Lets start by getting to know each other. What are your investment goals and risk tolerance?, Customer: Ive had some bad experiences with financial advisors in the past. How can I trust you?, Salesman: I understand your concerns. Trust is crucial in this industry. Let me share some success stories from my previous clients and provide you with references to vouch fo...",Conversation Resolution,False,All messages were resolved,5023,gpt-3.5-turbo,1.0
