In [1]:
import pprint
import google.generativeai as palm
from openai import OpenAI
import pandas as pd
import time
import os
import csv
import random

In [2]:
palm.configure(api_key='YOUR-GOOGLEPALM-API-KEY-HERE')

client = OpenAI(
  api_key="YOUR_OPENAI_API_KEY_HERE",
)

In [3]:
# GooglePaLM
models = [m for m in palm.list_models() if 'generateText' in m.supported_generation_methods]
model = models[0].name
print(model)

models/text-bison-001


In [4]:
def talktomodel(prompt, modelName):

    if modelName == "LLaMa2":
        args = ("../../llama.cpp/main", "-m", "../../llama.cpp/models/llama2-7b-chat/llama-2-7b-chat.Q4_0.gguf", "-p", prompt, "-n", "2048")

        temp = subprocess.Popen(args, stdout = subprocess.PIPE)
        output = str(temp.communicate())

        return output

    elif modelName == "ChatGPT":
        completion = client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}])

        return completion.choices[0].message.content
        
    else:
        completion = palm.generate_text(
        model=model,
        prompt=prompt,
        temperature=0,
        # The maximum length of the response
        max_output_tokens=800,
        )

        return(completion.result)

In [5]:
def add_to_csv(i, task, pertModel, modelName, inputID, inputText, pertMethod, pertText, origOut, pertOut, origTime, pertTime):
    csvName = 'LLMFunctionsBy' + pertModel + '_Iteration' + str(i) + '_' + task + '_' + modelName + '.csv'
    
    if not os.path.isfile(csvName) or os.path.getsize(csvName) == 0:
        with open(csvName, 'w', newline='', encoding='utf-8') as csv_file:
            writer = csv.writer(csv_file)
            writer.writerow(['InputTextID', 'InputText', 'PerturbationID', 'PerturbedText', 'OriginalOutput', 'PerturbedOutput', 'OriginalTime', 'PerturbedTime'])
    
    with open(csvName, 'a', newline='', encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow([inputID, inputText, pertMethod, pertText, origOut, pertOut, origTime, pertTime])

In [6]:
def introduce_typos(input_text, modelName):
    prompt = "Please add many typos for each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [7]:
def delete_characters(input_text, modelName):
    prompt = "Please delete alphabets randomly for each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [8]:
def shuffle_characters(input_text, modelName):
    prompt = "Please shuffle the characters of random words for each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [9]:
def add_characters(input_text, modelName):
    prompt = "Please add random english alphabet characters for each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [10]:
def to_leet(input_text, modelName):
    prompt = "Please convert random words for each sentence in this text to l33t format, where a,e,i,o is replaced by 4,3,1,0 respectively."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [11]:
def add_spaces(input_text, modelName):
    prompt = "Please add lots of spaces randomly for each sentence in-between this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [12]:
def add_words(input_text, modelName):
    prompt = "Please add random words for each sentence in this text. Choose from the following: Apple, Pear, Banana, Grape"
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [13]:
def swap_characters(input_text, modelName):
    prompt = "Please swap any two characters in random words for each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [14]:
def antonym_replacement(input_text, modelName):
    prompt = "Please give me text that is semantically opposite to each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [15]:
def synonym_replacement(input_text, modelName):
    prompt = "Please paraphrase the each sentence in this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [16]:
def replace_random_sentence(input_text, modelName):
    prompt = "Please replace random sentences in this text with this sentence: Lorem ipsum dolor sit amet."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [17]:
def remove_random_sentence(input_text, modelName):
    prompt = "Plrease remove random sentences from this text."
    new_input = "'" + input_text + "' " + prompt
    model_output = talktomodel(new_input, modelName)
    
    if model_output:
        return model_output
    else:
        return input_text

In [18]:
prompt_mapping = {
    "sentiment analysis": "Analyse the sentiment of this text as positive, negative or neutral.",
    "text summarization": "Summarize this text in five sentences.",
    "information retrieval": "List the top 10 information from this text.",
    "toxicity detection": "Check whether this text contains toxic or spam content and say yes, no or unknown. Also provide reasons.",
    "news classification": "Categorize the news text into the following categories: World, Sports, Business, Science/Technology."
}

In [19]:
def process_row(row, iteration, modelName, pertModel):
    # add_to_csv(iteration, task, pertModel, modelName, inputID, inputText, pertMethod, pertText, origOut, pertOut, origTime, pertTime)
    task = row['task']
    input_text = row['input']
    inputID = row['inputID']
    time.sleep(1.0)
    if task in prompt_mapping:
        
        if task == "sentiment analysis" or task == "toxicity detection" or task == "news classification":
            prompt = prompt_mapping[task]
            
            orig_new_text = input_text + " " + prompt
            start_time = time.time()
            origOut = talktomodel(orig_new_text, modelName)
            end_time = time.time()
            origTime = end_time - start_time
        
            perturbed_text = introduce_typos(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Introducing Typos"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
            perturbed_text = delete_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Deleting Characters"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
            perturbed_text = synonym_replacement(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Replacing Synonyms"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
            perturbed_text = add_words(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Adding random words"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = to_leet(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Converting to l33t format"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = shuffle_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Shuffling characters in each word"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = add_spaces(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Adding spaces in text"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = add_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Adding random characters"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = swap_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Swapping two random characters in each word"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = antonym_replacement(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Replacing words with their antonyms"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
        elif task == "text summarization" or task == "information retrieval":
            prompt = prompt_mapping[task]
            
            orig_new_text = input_text + " " + prompt
            start_time = time.time()
            origOut = talktomodel(orig_new_text, modelName)
            end_time = time.time()
            origTime = end_time - start_time
        
            perturbed_text = introduce_typos(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Introducing Typos"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
            perturbed_text = delete_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Deleting Characters"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
            perturbed_text = synonym_replacement(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Replacing Synonyms"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
            perturbed_text = add_words(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Adding random words"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
     
            perturbed_text = replace_random_sentence(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Replacing random sentences"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = remove_random_sentence(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Removing random sentences"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = to_leet(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Converting to l33t format"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = add_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Adding random characters"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = antonym_replacement(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Replacing words with their antonyms"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
            perturbed_text = shuffle_characters(input_text, pertModel)
            pert_new_text = perturbed_text + " " + prompt
            pertMethod = "Shuffling characters in each word"
            start_time = time.time()
            pertOut = talktomodel(pert_new_text, modelName)
            end_time = time.time()
            pertTime = end_time - start_time
            add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
    else:
        # task = question answering
        orig_new_text = input_text
        start_time = time.time()
        origOut = talktomodel(orig_new_text, modelName)
        end_time = time.time()
        origTime = end_time - start_time
        
        pert_new_text = introduce_typos(input_text, pertModel)
        pertMethod = "Introducing Typos"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
        pert_new_text = delete_characters(input_text, pertModel)
        pertMethod = "Deleting Characters"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
        pert_new_text = synonym_replacement(input_text, pertModel)
        pertMethod = "Replacing Synonyms"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
        pert_new_text = add_words(input_text, pertModel)
        pertMethod = "Adding random words"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
        
        pert_new_text = to_leet(input_text, pertModel)
        pertMethod = "Converting to l33t format"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
        pert_new_text = shuffle_characters(input_text, pertModel)
        pertMethod = "Shuffling characters in each word"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
        pert_new_text = add_spaces(input_text, pertModel)
        pertMethod = "Adding spaces in text"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
        pert_new_text = add_characters(input_text, pertModel)
        pertMethod = "Adding random characters"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
        pert_new_text = swap_characters(input_text, pertModel)
        pertMethod = "Swapping two random characters in each word"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)
            
        pert_new_text = antonym_replacement(input_text, pertModel)
        pertMethod = "Replacing words with their antonyms"
        start_time = time.time()
        pertOut = talktomodel(pert_new_text, modelName)
        end_time = time.time()
        pertTime = end_time - start_time
        add_to_csv(iteration, task, pertModel, modelName, inputID, orig_new_text, pertMethod, pert_new_text, origOut, pertOut, origTime, pertTime)

In [20]:
def run(pertModelChoice, targetModelChoice, task_choice, num_iters):
    if str(pertModelChoice) == '2':
        pertModel = "ChatGPT"
    elif str(pertModelChoice) == '3':
        pertModel = "LLaMa2"
    else:
        pertModel = "GooglePaLM"

    if str(targetModelChoice) == '2':
        targetModel = "ChatGPT"
    elif str(targetModelChoice) == '3':
        targetModel = "LLaMa2"
    else:
        targetModel = "GooglePaLM"

    print("Testing target model: " + targetModel + " with perturbations by: " + pertModel)
    print("\n")

    if str(task_choice) == '1':
        task = "Information Retrieval"
        csv_file_path = 'Robustness/ir_inputs.csv'
        df = pd.read_csv(csv_file_path)

    elif str(task_choice) == '2':
        task = "News Classification"
        csv_file_path = 'Robustness/nc_inputs.csv'
        df = pd.read_csv(csv_file_path)

    elif str(task_choice) == '3':
        task = "Question Answering"
        csv_file_path = 'Robustness/qa_inputs.csv'
        df = pd.read_csv(csv_file_path)

    elif str(task_choice) == '5':
        task = "Toxicity Detection"
        csv_file_path = 'Robustness/td_inputs.csv'
        df = pd.read_csv(csv_file_path)

    elif str(task_choice) == '6':
        task = "Text Summarization"
        csv_file_path = 'Robustness/ts_inputs.csv'
        df = pd.read_csv(csv_file_path)

    else:
        task = "Sentiment Analysis"
        csv_file_path = 'Robustness/sa_inputs.csv'
        df = pd.read_csv(csv_file_path)

    print("Testing Task: " + task)
    print("\n")

    for i in range(0, int(num_iters)):
        for index, row in df.iterrows():
            process_row(row, i, targetModel, pertModel)
            time.sleep(1)

In [21]:
def main():
    print("Welcome to the METAL Framework - Test LLMs using Metamorphic Testing")
    print("\n")

    print("Please note that due to LLM limitations, only the Robustness QA can be tested using LLM-based perturbations")
    print("\n")

    # pert_model_choice = input("Choose the model you want to generate perturbations with. Enter 1 for Google PaLM, 2 for ChatGPT, 3 for LLaMa2 (Defaults to Google PaLM). Use comma-separated values for multiple options (Eg. 1,2): ")
    # print("\n")

    print("Using Model 1 - GooglePaLM to create perturbations")
    print("\n")
    pert_model_choice = '1'

    pert_model_choice = pert_model_choice.strip()
    pert_model_choices = pert_model_choice.split(',')

    for pmc in pert_model_choices:
        # model_choice = input("Choose the model you want to test. Enter 1 for Google PaLM, 2 for ChatGPT, 3 for LLaMa2 (Defaults to Google PaLM). Use comma-separated values for multiple options (Eg. 1,2): ")
        # print("\n")

        print("Using Model 1 - GooglePaLM to test")
        print("\n")

        model_choice = '1'

        model_choice = model_choice.strip()
        model_choices = model_choice.split(',')

        for mc in model_choices:
            task_choice = input("Choose the Robustness task you want to test the model against. Enter 1 for Information Retrieval, 2 for News Classification, 3 for Question Answering, 4 for Sentiment Analysis, 5 for Toxicity Detection, 6 for Text Summarization (Defaults to Sentiment Analysis). Use comma-separated values for multiple options (Eg. 1,2): ")
            print("\n")

            task_choice = task_choice.strip()
            task_choices = task_choice.split(',')

            for tc in task_choices:
                num_iters = input("Enter the number of iterations you want task " + str(tc) + " to run (Limit to 100, defaults to 10): ")
                print("\n")
                try:
                    user_input = int(num_iters)

                    if not 1 <= user_input <= 100:
                        print("Number of iterations outside range. Defaulting to 10 iterations")
                        print("\n")
                        num_iters = 10

                except ValueError:
                    print("Invalid input. Defaulting to 10 iterations.")
                    print("\n")
                    num_iters = 10

                run(pmc, mc, tc, num_iters)

In [22]:
main()

Welcome to the METAL Framework - Test LLMs using Metamorphic Testing


Please note that due to LLM limitations, only the Robustness QA can be tested using LLM-based perturbations


Using Model 1 - GooglePaLM to create perturbations


Using Model 1 - GooglePaLM to test




Choose the Robustness task you want to test the model against. Enter 1 for Information Retrieval, 2 for News Classification, 3 for Question Answering, 4 for Sentiment Analysis, 5 for Toxicity Detection, 6 for Text Summarization (Defaults to Sentiment Analysis). Use comma-separated values for multiple options (Eg. 1,2):  4






Enter the number of iterations you want task 4 to run (Limit to 100, defaults to 10):  1




Testing target model: GooglePaLM with perturbations by: GooglePaLM


Testing Task: Sentiment Analysis




InternalServerError: 500 An internal error has occurred. Please retry or report in https://developers.generativeai.google/guide/troubleshooting