In [15]:
!pip install -q together
!pip install -q FlagEmbedding
!pip install -q peft
!pip install -q faiss-gpu
!pip install openai==0.27.8



In [10]:
from FlagEmbedding import FlagModel
from FlagEmbedding import FlagReranker

import os
import torch
import faiss
import re
import json
import numpy as np
import requests
from tqdm import tqdm

import openai
from together import Together

In [9]:
from google.colab import userdata

TOGETHER_API_KEY = userdata.get('TOGETHER_API_KEY')
together = Together(api_key=TOGETHER_API_KEY)

openai.api_key = userdata.get('OpenAI_API_Key')

In [6]:
import pandas as pd

def get_query_and_rag(data_path):
    content = pd.read_csv(data_path)

    queries = content['query'].tolist()
    rag_sections = content['RAG_retrieval_results'].tolist()
    reference_answers = content['reference_answer'].tolist()

    assert len(queries) == len(rag_sections) == len(reference_answers)

    return queries, rag_sections, reference_answers

data_path = '/content/All_RAG_results.csv'
queries, rag_sections, reference_answers = get_query_and_rag(data_path)

# 1. Qwen

In [11]:
Qwen_all_response = []

answer_prompt = "You are a very helpful assistant. Please answer user's question according to given information. Trust the given information, it is completely align with the user's question."

for query, rag in tqdm(zip(queries, rag_sections)):
    new_messages = [
    {
        "role": "system",
        "content": answer_prompt,
    },
    {
        "role": "user",
        "content": f"""
## Question:
{query}

## Information:
{rag}
"""
    },
]
    qwen_res = together.chat.completions.create(
        model="Qwen/Qwen2.5-7B-Instruct-Turbo",
        messages=new_messages,
        max_tokens=1000,
        temperature=0.9,
    )

    Qwen_all_response.append(qwen_res.choices[0].message.content)

305it [07:12,  1.42s/it]


In [12]:
qwen_csv_dict = {'query': queries, 'reference_answer': reference_answers, 'RAG_retrieval_results': rag_sections, 'Agent_responds': Qwen_all_response}
df = pd.DataFrame(qwen_csv_dict)
df.to_csv('/content/Qwen_responds.csv', index=False)

# 2. GPT-4

In [13]:
GPT_4_all_response = []

for query, rag in tqdm(zip(queries, rag_sections)):
    new_messages = [
        {
            "role": "system",
            "content": answer_prompt,
        },
        {
            "role": "user",
            "content": f"""
## Question:
{query}

## Information:
{rag}
"""
        },
    ]

    gpt_response = openai.ChatCompletion.create(
        model="gpt-4",  # Specify the ChatGPT-4 model
        messages=new_messages,
        max_tokens=1024,
    )

    GPT_4_all_response.append(gpt_response.choices[0].message.content)

305it [10:31,  2.07s/it]


In [14]:
gpt_csv_dict = {'query': queries, 'reference_answer': reference_answers, 'RAG_retrieval_results': rag_sections, 'Agent_responds': GPT_4_all_response}
df_gpt = pd.DataFrame(gpt_csv_dict)
df_gpt.to_csv('/content/GPT_responds.csv', index=False)

# 3. llama

In [16]:
llama_all_response = []

for query, rag in tqdm(zip(queries, rag_sections)):
    new_messages = [
        {
            "role": "system",
            "content": answer_prompt,
        },
        {
            "role": "user",
            "content": f"""
## Question:
{query}

## Information:
{rag}
"""
        },
    ]

    llama_res = together.chat.completions.create(
        model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
        messages=new_messages,
        max_tokens=1000,
        temperature=0.9,
    )

    llama_all_response.append(llama_res.choices[0].message.content)

305it [06:54,  1.36s/it]


In [17]:
llama_csv_dict = {'query': queries, 'reference_answer': reference_answers, 'RAG_retrieval_results': rag_sections, 'Agent_responds': llama_all_response}
df_llama = pd.DataFrame(llama_csv_dict)
df_llama.to_csv('/content/llama_responds.csv', index=False)