In [None]:
! rm -r /content/SearchQuery2FuncCall
!git clone https://github.com/XiaoLIUau/SearchQuery2FuncCall.git

Cloning into 'SearchQuery2FuncCall'...
remote: Enumerating objects: 88, done.[K
remote: Counting objects: 100% (88/88), done.[K
remote: Compressing objects: 100% (57/57), done.[K
remote: Total 88 (delta 55), reused 61 (delta 30), pack-reused 0[K
Receiving objects: 100% (88/88), 61.83 KiB | 3.64 MiB/s, done.
Resolving deltas: 100% (55/55), done.


In [None]:
%pip install --upgrade pip
%pip install --disable-pip-version-check \
    torch==1.13.1 \
    torchdata==0.5.1

%pip install --upgrade accelerate\

%pip install \
    transformers==4.28.1 \
    datasets==2.11.0 \
    evaluate==0.4.0 \
    rouge_score==0.1.2 \
    google.generativeai \
    langchain \
    cohere

[0m

In [None]:
from time import sleep,time
from getpass import getpass
import pandas as pd
import evaluate
from langchain import PromptTemplate, LLMChain

# Load and process dataset

In [None]:
from SearchQuery2FuncCall.setup_dataset import text2json, load_n_process_data

text2json('/content/SearchQuery2FuncCall/Dataset.txt')
# q2f_datasets = load_n_process_data('/content/non_search_examples.json')
q2f_datasets = load_n_process_data('/content/q2f_dataset.json')
q2f_datasets

Saved 340 examples to 'q2f_dataset.json'.
Separated 87 Search() examples to 'search_examples.json'.
Separated 253 non-Search() examples to 'non_search_examples.json'.
Downloading and preparing dataset json/default to /root/.cache/huggingface/datasets/json/default-bb97d4d31ca1e4f7/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/default-bb97d4d31ca1e4f7/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['input', 'output'],
        num_rows: 260
    })
    test: Dataset({
        features: ['input', 'output'],
        num_rows: 80
    })
})

# Define prompt template


In [None]:
# define prompt format
def prompt_template():
    start_prompt = 'Input:'
    end_prompt = ', Output:'
    instruction = f"""Instruction: Given a search query, then route to different backend components based on the search intent.
1. If the search is about unit conversion, return API function UnitConvert(SourceUnit, TargetUnit, SourceValue).
2. If the search is about calculation, return API function Calculate(Equation).
3. If the search is about other search intent, return API function Search().
* For unit conversion: common unit conversion in length, mass, time, area, speed, temperature, volume should be covered. And it should be consistent for the same unit throughout. E.g. it should always be “foot”, it cannot be “feet” or “ft” in API calls.
* For calculation: common operation such as +, -, *, /, pow, log, ln, exp, tan(h), sin(h), cos(h), factorial should be covered. And it should be consistent for the same operation throughout. E.g. it should always be “ * ”, it cannot be “x” or “X” in API calls.
Handle input queries in different language styles. Cover common unit conversion and calculation operations.

Examples:
{start_prompt} “ft to cm” {end_prompt} “UnitConvert(SourceUnit:foot, TargetUnit:centimeter,
SourceValue:1)”
{start_prompt} “how many ounces in 5.8 kilograms” {end_prompt} “UnitConvert(SourceUnit:kilogram,
TargetUnit:ounce, SourceValue:5.8)”
{start_prompt} “two to the power of 10” {end_prompt} “Calculate(2^10)”
{start_prompt} “2001-1989” {end_prompt} “Calculate(2001-1989)”
{start_prompt} “what is chatgpt” {end_prompt} “Search()”
{start_prompt} “primary year 1 maths calculation checklist” {end_prompt} “Search()”
{start_prompt} “what are different length units” {end_prompt} “Search()”
{start_prompt} “Natural logarithm of -3/18” {end_prompt} “Calculate(ln(-3/18))”

"""
    template = instruction + start_prompt + '“{input}”' + end_prompt + '\n'
    return template

# Load proprietary model API key
Here we use Cohere and Palm models

Note: Please load a text file that contains your model api key to current folder

>Name your file in either ***'api_key_cohere.txt'*** or ***'api_key_palm.txt'*** accordingly




In [None]:
""" # Get model api key """
def load_api_key_from_file(file_path):
    with open(file_path, 'r') as file:
        api_key = file.read().strip()
    return api_key

"""# Cohere Model API"""
def setup_model_cohere():
  from langchain.llms import Cohere
  # from langchain.embeddings import CohereEmbeddings
  api_key_file = "api_key_cohere.txt"
  COHERE_API_KEY = load_api_key_from_file(api_key_file)
  llm = Cohere(cohere_api_key=COHERE_API_KEY, temperature=0.0000000001)
  # embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY)
  return llm

"""# Palm API"""
def setup_model_palm():
  # from langchain.embeddings import GooglePalmEmbeddings
	from langchain.llms import GooglePalm
	import google.generativeai as palm

  # configure palm
	api_key_file = "api_key_palm.txt"
	Palm_API_KEY = load_api_key_from_file(api_key_file)

	llm = GooglePalm(google_api_key=Palm_API_KEY, temperature = 0.0)
	# embeddings =GooglePalmEmbeddings(google_api_key=Palm_API_KEY)

	return llm


# Define Searh Query function to generate desided output using LangChain

In [None]:
def searchQuery2(input, llm):

  template = prompt_template()

  prompt = PromptTemplate(template=template, input_variables=["input"])

  llm_chain = LLMChain(prompt=prompt, llm=llm)

  OUTPUTS=llm_chain.predict(input=input)

  return OUTPUTS

# Post processing generation output from LLM

In [None]:
def extractOutputString(input_string):
    input_string = "".join(input_string.split())
    prefixes = ['“', '”', "'", '"']
    if input_string.startswith(tuple(prefixes)):
        input_string = input_string[1:]
    if input_string.endswith(tuple(prefixes)):
        input_string = input_string[:-1]
    return input_string

# Generate outputs for given test dataset
Here we are using the test dataset

In [None]:
dash_line = '-'.join('' for x in range(100))

llm = setup_model_palm()
# llm = setup_model_cohere()

index_s=0
index_e=index_s+len(q2f_datasets['test'])
inputs = q2f_datasets['test'][index_s:index_e]['input']
outputs = q2f_datasets['test'][index_s:index_e]['output']
# inputs = q2f_datasets['test']['input']
# outputs = q2f_datasets['test']['output']

API_outputs = []

for idx, input in enumerate(inputs):

    API_output = searchQuery2(input, llm)
    API_output = extractOutputString(API_output)
    API_outputs.append(API_output)


zipped_summaries = list(zip(inputs, outputs, API_outputs))

df = pd.DataFrame(zipped_summaries, columns = ['inputs', 'outputs', 'API_outputs'])
df

Unnamed: 0,inputs,outputs,API_outputs
0,Natural logarithm of 2.71828,Calculate(ln(2.71828)),Calculate(ln(2.71828))
1,current population of India,Search(),Search()
2,Convert 0.5 meters to centimeters,"UnitConvert(SourceUnit:meter,TargetUnit:centim...","UnitConvert(SourceUnit:meter,TargetUnit:centim..."
3,convert 60 miles to kilometers,"UnitConvert(SourceUnit:mile,TargetUnit:kilomet...","UnitConvert(SourceUnit:mile,TargetUnit:kilomet..."
4,convert 273.15 degrees celsius to kelvin,"UnitConvert(SourceUnit:celsius,TargetUnit:kelv...","UnitConvert(SourceUnit:celsius,TargetUnit:kelv..."
...,...,...,...
75,convert 250 liters to milliliters,"UnitConvert(SourceUnit:liter,TargetUnit:millil...","UnitConvert(SourceUnit:liter,TargetUnit:millil..."
76,Change 500 grams to pounds,"UnitConvert(SourceUnit:gram,TargetUnit:pound,S...","UnitConvert(SourceUnit:gram,TargetUnit:pound,S..."
77,What is machine learning?,Search(),Search()
78,convert 2.25 centimeters to inches,"UnitConvert(SourceUnit:centimeter,TargetUnit:i...","UnitConvert(SourceUnit:centimeter,TargetUnit:i..."


# Evaluate using ROUGE and BLEU scores


In [None]:
# Rouge
rouge = evaluate.load('rouge')
API_model_results = rouge.compute(
    predictions=API_outputs,
    references=outputs[0:len(API_outputs)],
    use_aggregator=True,
    use_stemmer=True,
)

print('API MODEL ROUGE SCORES:')
print(API_model_results)

# bleu
bleu = evaluate.load('bleu')
API_model_results = bleu.compute(
    predictions=API_outputs,
    references=outputs,
)

print('API MODEL BLEU SCORES:')
print(API_model_results)

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

API MODEL ROUGE SCORES:
{'rouge1': 0.9792455808080808, 'rouge2': 0.6571130952380952, 'rougeL': 0.9786600378787879, 'rougeLsum': 0.9796527777777778}


Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

API MODEL BLEU SCORES:
{'bleu': 0.9031304448754985, 'precisions': [0.9669172932330827, 0.9401709401709402, 0.904950495049505, 0.8588235294117647], 'brevity_penalty': 0.9850749060157432, 'length_ratio': 0.9851851851851852, 'translation_length': 665, 'reference_length': 675}


## Results

* Text examples for index 50:70 in test dataset

### Cohere

API MODEL ROUGE SCORES:
{'rouge1': 0.9642857142857142, 'rouge2': 0.6060606060606061, 'rougeL': 0.9642857142857142, 'rougeLsum': 0.9642857142857142}

API MODEL BLEU SCORES:
{'bleu': 0.6720708576427871, 'precisions': [0.75, 0.7083333333333334, 0.6554054054054054, 0.5859375], 'brevity_penalty': 1.0, 'length_ratio': 1.3146853146853146, 'translation_length': 188, 'reference_length': 143}

### Palm

API MODEL ROUGE SCORES:
{'rouge1': 0.9854545454545456, 'rouge2': 0.7849999999999999, 'rougeL': 0.9854545454545456, 'rougeLsum': 0.9854545454545456}

API MODEL BLEU SCORES:
{'bleu': 0.9159590736349433, 'precisions': [0.9623655913978495, 0.9337349397590361, 0.8972602739726028, 0.873015873015873], 'brevity_penalty': 1.0, 'length_ratio': 1.0, 'translation_length': 186, 'reference_length': 186}

