In [None]:
!pip install -q together
!pip install openai==0.27.8
!pip install zhipuai

Collecting openai==0.27.8
  Downloading openai-0.27.8-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.27.8-py3-none-any.whl (73 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.6/73.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.54.4
    Uninstalling openai-1.54.4:
      Successfully uninstalled openai-1.54.4
Successfully installed openai-0.27.8
Collecting zhipuai
  Downloading zhipuai-2.1.5.20230904-py3-none-any.whl.metadata (10 kB)
Collecting pyjwt<2.9.0,>=2.8.0 (from zhipuai)
  Downloading PyJWT-2.8.0-py3-none-any.whl.metadata (4.2 kB)
Downloading zhipuai-2.1.5.20230904-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PyJWT-2.8.0-py3-none-any.whl (22 kB)
Installing collected packages: pyjwt, zhipuai
  Attempting uninstall: pyjw

In [None]:
from google.colab import userdata
TOGETHER_API_KEY = userdata.get('TOGETHER_API_KEY')

In [None]:
from together import Together
import openai
from zhipuai import ZhipuAI

import os
import re
import json
import numpy as np
import requests

import pandas as pd
from collections import Counter

In [None]:
together = Together(api_key=TOGETHER_API_KEY)
openai.api_key = userdata.get('OpenAI_API_Key')

GLM_API_Key = userdata.get('GLM_API_Key')  # https://www.bigmodel.cn/usercenter/apikeys
client = ZhipuAI(api_key=GLM_API_Key)

In [None]:
tools_list = [
    {
        "type": "function",
        "function": {
            "name": "RAG",
            "description": "Retrieve the relevant section in the given knowledge base when the user asks information about courses or syllabus.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The original question that the user asks exactly, no need to rephrase.",
                    },
                },
                "required": ["query"],
                "additionalProperties": False,
            },
        }
    },
    {
        "type": "function",
        "function": {
            "name": "get_current_weather",
            "description": "Fetches the current weather for a given location.",
            "parameters": {
                "type": "object",
                "properties": {
                    "location": {
                        "type": "string",
                        "description": "The name of the city and country (e.g., 'San Francisco, US').",
                    },
                },
                "required": ["location"],
                "additionalProperties": False,
            },
        }
    },
    {
        "type": "function",
        "function": {
            "name": "financial_news_report",
            "description": "Retrieve recent financial news when the user asks for it.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The original question that the user asks exactly, no need to rephrase.",
                    },
                },
                "required": ["query"],
                "additionalProperties": False,
            },
        }
    },
]

toolPrompt = f"""
You have access to the following functions:

Use the function '{tools_list[0]['function']['name']}' to '{tools_list[0]['function']['description']}'.
The parameters are: {json.dumps(tools_list[0]['function']['parameters']['properties'])}, where {tools_list[0]['function']['parameters']['required']} are required.

Use the function '{tools_list[1]['function']['name']}' to '{tools_list[1]['function']['description']}':
The parameters are: {json.dumps(tools_list[1]['function']['parameters']['properties'])}, where {tools_list[1]['function']['parameters']['required']} are required.

Use the function '{tools_list[2]['function']['name']}' to '{tools_list[2]['function']['description']}':
The parameters are: {json.dumps(tools_list[2]['function']['parameters']['properties'])}, where {tools_list[2]['function']['parameters']['required']} are required.

If you choose to call a function ONLY reply in the following format with no prefix or suffix:

<function=example_function_name>{{\"example_name\": \"example_value\"}}</function>

Reminder:
- Function calls MUST follow the specified format, start with <function= and end with </function>
- Required parameters MUST be specified
- Only call one function at a time
- Put the entire function call reply on one line
- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls

"""

In [None]:
def parse_tool_response(response_message):
    """
    response_message: response.choices[0].message
    """
    if response_message.tool_calls:
        parsed_response = {
            "function": response_message.tool_calls[0].function.name,
            "arguments": json.loads(response_message.tool_calls[0].function.arguments)
          }

        return parsed_response

    function_regex = r"<function=([a-zA-Z_]\w*)>(\{.*?\})\s*(?:</function>|<function(?:/[\w]*)?>)"
    match = re.search(function_regex, response_message.content)

    if match:
        function_name, args_string = match.groups()
        args_string = args_string.strip()

        try:
            args = json.loads(args_string)

            parsed_response = {
                "function": function_name,
                "arguments": args,
            }

            return parsed_response

        except json.JSONDecodeError as error:
            print(f"Error parsing function arguments: {error}")
            return None

    return None

In [None]:
def load_queries_and_functions(csv_path_list):
    """
    Reads a CSV file containing queries and function names, and converts it to a dictionary.

    Args:
    csv_path (str): Path to the CSV file.

    Returns:
    dict: A dictionary with queries as keys and function names as values.
    """
    all_queries = []
    all_functions = []

    for csv_path in csv_path_list:
        # Load the CSV file into a pandas DataFrame
        data = pd.read_csv(csv_path)

        # Ensure the necessary columns exist
        if 'query' not in data.columns or 'function_to_call' not in data.columns:
            raise ValueError("CSV file must contain 'query' and 'function_to_call' columns.")

        all_queries.extend(data['query'])
        all_functions.extend(data['function_to_call'])

    assert len(all_queries) == len(all_functions)
    print('Total query number:', len(all_queries))

    counter = Counter(all_queries)
    for item, count in counter.items():
        if count > 1:
            print(f"'{item}' is duplicated {count} times")

    query_function_dict = dict(zip(all_queries, all_functions))

    return query_function_dict

# Path to the CSV file in Google Drive
csv_path_list = ['/content/combined_data.csv', '/content/combined_fake_data.csv']
query_function_dict = load_queries_and_functions(csv_path_list)

Total query number: 305
'Will there be any guest speakers in Robotic Perception?' is duplicated 2 times


In [None]:
print(len(query_function_dict))

304


# LLaMA function call accuracy

In [None]:
correct_tool_usage = 0
incorrect_queries = []

for query, expected_function in query_function_dict.items():
    response = together.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        messages=[{"role": "system", "content": toolPrompt}, {"role": "user", "content": query}],
        max_tokens=1024,
        temperature=0,
        tools=tools_list,
        tool_choice="auto",
    )

    parsed_response = parse_tool_response(response.choices[0].message)
    # print(f"Query: {query}\nExpected: {expected_function}, Predicted: {parsed_response}")

    if parsed_response and parsed_response.get("function") == expected_function:
        correct_tool_usage += 1
    else:
        incorrect_queries.append((query, response.choices[0].message.content))

accuracy = (correct_tool_usage / len(query_function_dict)) * 100
print(f"\nAccuracy: {accuracy:.2f}%")
print(f"Total Correct: {correct_tool_usage} / {len(query_function_dict)}")

print("\nIncorrect Queries:")
for q, c in incorrect_queries:
    print(q)
    print(c)
    print()


Accuracy: 99.67%
Total Correct: 303 / 304

Incorrect Queries:
Please tell me when and where 585 will be held.
I'm not aware of any information about an event called "585". Could you provide more context or clarify what you are referring to?



# Qwen function call accuracy

In [None]:
correct_tool_usage = 0
incorrect_queries = []

for query, expected_function in query_function_dict.items():
    response = together.chat.completions.create(
        model="Qwen/Qwen2.5-7B-Instruct-Turbo",
        messages=[{"role": "system", "content": toolPrompt}, {"role": "user", "content": query}],
        max_tokens=1024,
        temperature=0,
        # functions=functions,
        # tools=tools_list,
        # tool_choice="auto",
    )

    parsed_response = parse_tool_response(response.choices[0].message)
    # print(f"Query: {query}\nExpected: {expected_function}, Predicted: {parsed_response}")

    if parsed_response and parsed_response.get("function") == expected_function:
        correct_tool_usage += 1
    else:
        incorrect_queries.append((query, response.choices[0].message.content))

accuracy = (correct_tool_usage / len(query_function_dict)) * 100
print(f"\nAccuracy: {accuracy:.2f}%")
print(f"Total Correct: {correct_tool_usage} / {len(query_function_dict)}")

print("\nIncorrect Queries:")
for q, c in incorrect_queries:
    print(q)
    print(c)
    print()


Accuracy: 95.72%
Total Correct: 291 / 304

Incorrect Queries:
Tell me about the time line for the project for Natural Language Processing
The information about the project timeline for Natural Language Processing is not available in the current knowledge base. I would recommend checking the project management tools or documents for detailed information. If you need help with understanding NLP concepts or other related information, I can assist with that.

Tell me the email of teaching assistance Matthew Finlayson for Natural Language Processing
The knowledge base does not contain specific contact information for individual teaching assistants. I recommend checking the course page or contacting the department directly for this information.

How much is the project worth for deep learning?
The value or worth of a project in deep learning can vary widely depending on several factors such as the specific goals of the project, the complexity of the tasks involved, the size and quality of t

# GLM function call accuracy

In [None]:
correct_tool_usage = 0
incorrect_queries = []

for query, expected_function in query_function_dict.items():
    response = client.chat.completions.create(
        model="glm-4-9b",
        messages=[{"role": "system", "content": toolPrompt}, {"role": "user", "content": query}],
        tools=tools_list,
        tool_choice="auto",
    )

    parsed_response = parse_tool_response(response.choices[0].message)
    # print(f"Query: {query}\nExpected: {expected_function}, Predicted: {parsed_response}")

    if parsed_response and parsed_response.get("function") == expected_function:
        correct_tool_usage += 1
    else:
        incorrect_queries.append((query, response.choices[0].message.content))

accuracy = (correct_tool_usage / len(query_function_dict)) * 100
print(f"\nAccuracy: {accuracy:.2f}%")
print(f"Total Correct: {correct_tool_usage} / {len(query_function_dict)}")

print("\nIncorrect Queries:")
for q, c in incorrect_queries:
    print(q)
    print(c)
    print()


Accuracy: 90.46%
Total Correct: 275 / 304

Incorrect Queries:
What is the main focus of the computing and visualization class?
The main focus of the computing and visualization class is to provide students with a comprehensive understanding of computational methods and their applications in data analysis, scientific research, and other fields. This class covers topics such as programming, data structures, algorithms, and visualization techniques. Students will learn how to analyze and visualize complex data sets, develop efficient algorithms, and use various tools and software for data processing and visualization. The course aims to equip students with the necessary skills to tackle real-world problems using computational methods and visualization techniques.

In text as data, what type of homework is assigned, and what percentage of the grade does it account for?
The percentage of the grade that homework accounts for in text as data can vary depending on the specific course and inst

# GPT function call accuracy

In [None]:
def parse_tool_response_GPT(response_message):
    parsed_results = None

    # 使用 getattr 获取 tool_calls，防止 AttributeError
    tool_calls = getattr(response_message, "tool_calls", None)

    if tool_calls is not None and response_message.content is None:
        parsed_results = {}

        func_info = tool_calls[0].function
        parsed_results['function'] = func_info.name
        parsed_results['arguments'] = json.loads(func_info.arguments)

    return parsed_results

In [None]:
correct_tool_usage = 0
incorrect_queries = []

for query, expected_function in query_function_dict.items():
    messages = []
    messages.append({"role": "system", "content": "You are a helpful customer support assistant. Use the supplied tools to assist the user. And only call one function at a time."})
    messages.append({"role": "user", "content": query})

    response = openai.ChatCompletion.create(
        model="gpt-4",  # Specify the ChatGPT-4 model
        messages=messages,
        max_tokens=1024,
        temperature=0,
        tools=tools_list,
        tool_choice="auto"
    )

    parsed_response = parse_tool_response_GPT(response.choices[0].message)
    # print(f"Query: {query}\nExpected: {expected_function}, Predicted: {parsed_response}")

    if parsed_response is not None and parsed_response.get("function") == expected_function:
        correct_tool_usage += 1
    else:
        incorrect_queries.append((query, response.choices[0].message.content))

accuracy = (correct_tool_usage / len(query_function_dict)) * 100
print(f"\nAccuracy: {accuracy:.2f}%")
print(f"Total Correct: {correct_tool_usage} / {len(query_function_dict)}")

print("\nIncorrect Queries:")
for q, c in incorrect_queries:
    print(q)
    print(c)
    print()


Accuracy: 99.67%
Total Correct: 303 / 304

Incorrect Queries:
Tell me the email of teaching assistance Matthew Finlayson for Natural Language Processing
I'm sorry, but I can't assist with that.

