In [None]:
! rm -r /content/SearchQuery2FuncCall
!git clone https://github.com/XiaoLIUau/SearchQuery2FuncCall.git

In [None]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1

%pip install --upgrade accelerate\

%pip install \
    transformers==4.28.1 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    google.generativeai \
    langchain \
    cohere

In [None]:
from time import sleep,time
from getpass import getpass
import pandas as pd
import evaluate
from langchain import PromptTemplate, LLMChain

In [None]:
from SearchQuery2FuncCall.setup_dataset import text2json, load_n_process_data

text2json('/content/SearchQuery2FuncCall/Dataset.txt')
# q2f_datasets = load_n_process_data('/content/non_search_examples.json')
q2f_datasets = load_n_process_data('/content/q2f_dataset.json')
q2f_datasets

In [None]:
# define prompt format
def prompt_template():
    start_prompt = '<User Query>:'
    end_prompt = ', <API Call>: '
    instruction = f"""Instruction: Given a search query, then route to different backend components based on the search intent.
1. If the search is about unit conversion, return API function UnitConvert(SourceUnit, TargetUnit, SourceValue).
2. If the search is about calculation, return API function Calculate(Equation).
3. If the search is about other search intent, return API function Search(). 
Handle input queries in different language styles. Cover common unit conversion and calculation operations.

Examples:
{start_prompt}: “ft to cm”; {end_prompt}: “UnitConvert(SourceUnit:foot, TargetUnit:centimeter,
SourceValue:1)”
{start_prompt}: “how many ounces in 5.8 kilograms”; {end_prompt}: “UnitConvert(SourceUnit:kilogram,
TargetUnit:ounce, SourceValue:5.8)”
{start_prompt}: “two to the power of 10”, {end_prompt}: “Calculate(2^10)”
{start_prompt}: “2001-1989”, {end_prompt}: “Calculate(2001-1989)”
{start_prompt}: “what is chatgpt”, {end_prompt}: “Search()”
{start_prompt}: “primary year 1 maths calculation checklist”, {end_prompt}: “Search()”
{start_prompt}: “what are different length units”, {end_prompt}: “Search()”
{start_prompt}: “Natural logarithm of -3/18“, {end_prompt}: “Calculate(ln(-3/18))”

"""
    template = instruction + start_prompt + '{input},' + end_prompt
    return template

In [None]:
""" # Get model api key """
def load_api_key_from_file(file_path):
    with open(file_path, 'r') as file:
        api_key = file.read().strip()
    return api_key

"""# Cohere Model API"""
def setup_model_cohere():
  from langchain.llms import Cohere
  # from langchain.embeddings import CohereEmbeddings
  api_key_file = "api_key_cohere.txt"
  COHERE_API_KEY = load_api_key_from_file(api_key_file)
  llm = Cohere(cohere_api_key=COHERE_API_KEY, temperature=0.0000000001)
  # embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY)
  return llm

"""# Palm API"""
def setup_model_palm():
  # from langchain.embeddings import GooglePalmEmbeddings
	from langchain.llms import GooglePalm
	import google.generativeai as palm

  # configure palm
	api_key_file = "api_key_palm.txt"
	Palm_API_KEY = load_api_key_from_file(api_key_file)

	llm = GooglePalm(google_api_key=Palm_API_KEY, temperature = 0.0)
	# embeddings =GooglePalmEmbeddings(google_api_key=Palm_API_KEY)

	return llm

def searchQuery2(input, llm):
  
  template = prompt_template()

  prompt = PromptTemplate(template=template, input_variables=["input"])

  llm_chain = LLMChain(prompt=prompt, llm=llm)

  OUTPUTS=llm_chain.predict(input=input)

  return OUTPUTS

def extractOutputString(input_string):
    # if input_string.startswith(' '):
    #     input_string = input_string[1:]
    input_string = "".join(input_string.split())
    if input_string.startswith('"'):
        input_string = input_string[1:]
    if input_string.endswith('"'):
        input_string = input_string[:-1]
    return input_string

In [None]:
dash_line = '-'.join('' for x in range(100))

# llm = setup_model_palm()
llm = setup_model_cohere()

inputs = q2f_datasets['test'][50:70]['input']
outputs = q2f_datasets['test'][50:70]['output']
# inputs = q2f_datasets['test']['input']
# outputs = q2f_datasets['test']['output']

API_outputs = []

for idx, input in enumerate(inputs):

    API_output = searchQuery2(input, llm)
    API_output = extractOutputString(API_output)
    API_outputs.append(API_output)


zipped_summaries = list(zip(inputs, outputs, API_outputs))

df = pd.DataFrame(zipped_summaries, columns = ['inputs', 'outputs', 'API_outputs'])
df

In [None]:
rouge = evaluate.load('rouge')

API_model_results = rouge.compute(
    predictions=API_outputs,
    references=outputs[0:len(API_outputs)],
    use_aggregator=True,
    use_stemmer=True,
)

print('API MODEL ROUGE SCORES:')
print(API_model_results)

In [None]:
bleu = evaluate.load('bleu')

API_model_results = bleu.compute(
    predictions=API_outputs,
    references=outputs,
)

print('API MODEL BLEU SCORES:')
print(API_model_results)