| \n", - " | \n", - " | \n", - " | task | \n", - "Average | \n", - "agnews | \n", - "cr | \n", - "mr | \n", - "sst-5 | \n", - "sst2 | \n", - "subj | \n", - "trec | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|
| meta_llm | \n", - "downstream_llm | \n", - "optimizer | \n", - "use_task_desc | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| Llama-3-70B | \n", - "Llama-3-70B | \n", - "DE | \n", - "False | \n", - "79.74 | \n", - "86.67 ± 0.29 | \n", - "91.83 ± 0.76 | \n", - "91.50 ± 0.87 | \n", - "50.17 ± 3.25 | \n", - "95.17 ± 3.69 | \n", - "70.67 ± 6.83 | \n", - "72.17 ± 3.69 | \n", - "
| GA | \n", - "False | \n", - "78.26 | \n", - "85.50 ± 2.29 | \n", - "89.83 ± 2.25 | \n", - "88.83 ± 4.65 | \n", - "51.67 ± 2.25 | \n", - "95.33 ± 2.02 | \n", - "68.83 ± 8.75 | \n", - "67.83 ± 5.77 | \n", - "||
| Llama-3-8B | \n", - "Llama-3-70B | \n", - "DE | \n", - "False | \n", - "74.52 | \n", - "85.00 ± 1.80 | \n", - "91.67 ± 1.15 | \n", - "90.67 ± 2.36 | \n", - "38.00 ± 14.31 | \n", - "92.00 ± 8.67 | \n", - "58.00 ± 10.44 | \n", - "66.33 ± 16.74 | \n", - "
| GA | \n", - "False | \n", - "74.50 | \n", - "86.50 ± 1.50 | \n", - "83.00 ± 5.07 | \n", - "84.33 ± 5.30 | \n", - "51.67 ± 2.36 | \n", - "91.33 ± 3.79 | \n", - "61.00 ± 7.00 | \n", - "63.67 ± 10.05 | \n", - "||
| init | \n", - "Llama-3-70B | \n", - "init | \n", - "False | \n", - "72.81 | \n", - "86.67 ± 2.02 | \n", - "93.17 ± 1.76 | \n", - "89.67 ± 4.48 | \n", - "39.83 ± 31.32 | \n", - "93.17 ± 3.69 | \n", - "54.00 ± 7.26 | \n", - "53.17 ± 17.86 | \n", - "
| \n", - " | \n", - " | \n", - " | task | \n", - "Average | \n", - "agnews | \n", - "cr | \n", - "mr | \n", - "sst-5 | \n", - "sst2 | \n", - "subj | \n", - "trec | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|
| meta_llm | \n", - "downstream_llm | \n", - "optimizer | \n", - "use_task_desc | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| Llama-3-8B | \n", - "Llama-3-70B | \n", - "DE | \n", - "False | \n", - "74.52 | \n", - "85.00 ± 1.80 | \n", - "91.67 ± 1.15 | \n", - "90.67 ± 2.36 | \n", - "38.00 ± 14.31 | \n", - "92.00 ± 8.67 | \n", - "58.00 ± 10.44 | \n", - "66.33 ± 16.74 | \n", - "
| True | \n", - "78.12 | \n", - "87.33 ± 0.76 | \n", - "93.00 ± 0.00 | \n", - "87.33 ± 1.04 | \n", - "51.33 ± 3.40 | \n", - "94.17 ± 4.65 | \n", - "59.17 ± 9.25 | \n", - "74.50 ± 5.41 | \n", - "|||
| GA | \n", - "False | \n", - "74.50 | \n", - "86.50 ± 1.50 | \n", - "83.00 ± 5.07 | \n", - "84.33 ± 5.30 | \n", - "51.67 ± 2.36 | \n", - "91.33 ± 3.79 | \n", - "61.00 ± 7.00 | \n", - "63.67 ± 10.05 | \n", - "||
| True | \n", - "76.50 | \n", - "85.67 ± 1.04 | \n", - "89.83 ± 3.18 | \n", - "91.17 ± 3.01 | \n", - "46.67 ± 2.25 | \n", - "91.50 ± 8.26 | \n", - "66.33 ± 6.25 | \n", - "64.33 ± 2.25 | \n", - "
| \n", - " | \n", - " | \n", - " | task | \n", - "Average | \n", - "agnews | \n", - "cr | \n", - "sst-5 | \n", - "subj | \n", - "
|---|---|---|---|---|---|---|---|---|
| meta_llm | \n", - "downstream_llm | \n", - "optimizer | \n", - "use_task_desc | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| Llama-3-70B | \n", - "Llama-3-70B | \n", - "DE | \n", - "False | \n", - "74.83 | \n", - "86.67 ± 0.29 | \n", - "91.83 ± 0.76 | \n", - "50.17 ± 3.25 | \n", - "70.67 ± 6.83 | \n", - "
| gpt-4o | \n", - "DE | \n", - "False | \n", - "73.92 | \n", - "82.17 ± 0.58 | \n", - "92.67 ± 3.33 | \n", - "51.33 ± 1.53 | \n", - "69.50 ± 2.60 | \n", - "
| \n", - " | \n", - " | \n", - " | task | \n", - "Average | \n", - "agnews | \n", - "cr | \n", - "mr | \n", - "sst-5 | \n", - "sst2 | \n", - "subj | \n", - "trec | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|
| meta_llm | \n", - "downstream_llm | \n", - "optimizer | \n", - "use_task_desc | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| Llama-3-70B | \n", - "Llama-3-70B | \n", - "DE | \n", - "False | \n", - "79.74 | \n", - "86.67 ± 0.29 | \n", - "91.83 ± 0.76 | \n", - "91.50 ± 0.87 | \n", - "50.17 ± 3.25 | \n", - "95.17 ± 3.69 | \n", - "70.67 ± 6.83 | \n", - "72.17 ± 3.69 | \n", - "
| GA | \n", - "False | \n", - "78.26 | \n", - "85.50 ± 2.29 | \n", - "89.83 ± 2.25 | \n", - "88.83 ± 4.65 | \n", - "51.67 ± 2.25 | \n", - "95.33 ± 2.02 | \n", - "68.83 ± 8.75 | \n", - "67.83 ± 5.77 | \n", - "
| \n", - " | \n", - " | \n", - " | task | \n", - "Average | \n", - "agnews | \n", - "cr | \n", - "mr | \n", - "sst-5 | \n", - "sst2 | \n", - "subj | \n", - "trec | \n", - "
|---|---|---|---|---|---|---|---|---|---|---|---|
| meta_llm | \n", - "downstream_llm | \n", - "optimizer | \n", - "use_task_desc | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| Llama-3-8B | \n", - "Llama-3-70B | \n", - "DE | \n", - "False | \n", - "74.52 | \n", - "85.00 ± 1.80 | \n", - "91.67 ± 1.15 | \n", - "90.67 ± 2.36 | \n", - "38.00 ± 14.31 | \n", - "92.00 ± 8.67 | \n", - "58.00 ± 10.44 | \n", - "66.33 ± 16.74 | \n", - "
| GA | \n", - "False | \n", - "74.50 | \n", - "86.50 ± 1.50 | \n", - "83.00 ± 5.07 | \n", - "84.33 ± 5.30 | \n", - "51.67 ± 2.36 | \n", - "91.33 ± 3.79 | \n", - "61.00 ± 7.00 | \n", - "63.67 ± 10.05 | \n", - "
| \n", - " | \n", - " | \n", - " | task | \n", - "Average | \n", - "agnews | \n", - "cr | \n", - "sst-5 | \n", - "subj | \n", - "
|---|---|---|---|---|---|---|---|---|
| meta_llm | \n", - "downstream_llm | \n", - "optimizer | \n", - "use_task_desc | \n", - "\n", - " | \n", - " | \n", - " | \n", - " | \n", - " |
| Llama-3-70B | \n", - "gpt-4o | \n", - "DE | \n", - "False | \n", - "73.92 | \n", - "82.17 ± 0.58 | \n", - "92.67 ± 3.33 | \n", - "51.33 ± 1.53 | \n", - "69.50 ± 2.60 | \n", - "
| init | \n", - "gpt-4o | \n", - "init | \n", - "False | \n", - "65.71 | \n", - "85.50 ± 0.87 | \n", - "82.33 ± 20.71 | \n", - "37.83 ± 19.43 | \n", - "57.17 ± 12.10 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 76 | \n", - "Llama8B | \n", - "DE | \n", - "Let's follow the instructions step-by-step to generate a better prompt.\\r\\n\\r\\n1. Identify the different parts between Prompt 1 and Prompt 2:\\r\\n\\r\\nPrompt 1: Your task is to choose a type of the question, from Description, Entity, Expression, Human, Location and Number.\\r\\nPrompt 2: You are given a question. You need to detect which category better describes the question. Answer with \"Description\", \"Entity\", \"Expression\", \"Human\", \"Location\", and \"Number\".\\r\\n\\r\\nDifferent parts:\\r\\n\\r\\n* \"Your task is to choose\" vs \"You are given a question and need to detect\"\\r\\n* \"a type of the question\" vs \"which category\"\\r\\n* \"Description, Entity, Expression, Human, Location and Number\" vs not changed\\r\\n\\r\\n2. Randomly mutate the different parts:\\r\\n\\r\\n* \"Your task is to choose\" -> \"The goal is to determine\"\\r\\n* \"a type of the question\" -> \"the nature of the inquiry\"\\r\\n* \"Description, Entity, Expression, Human, Location and Number\" -> \"categories: Description, Entity, Expression, Human, Location, and Number Type\"\\r\\n\\r\\n3. Crossover the different parts with Prompt 3 and generate a final prompt:\\r\\n\\r\\nPrompt 3: Identify the category that corresponds to this sentence: | \n", - "0.270 | \n", - "
| 41 | \n", - "NaN | \n", - "NaN | \n", - "Please select the correct classification for this sentence: Description, Entity, Expression, Human, Location, or Number. | \n", - "0.330 | \n", - "
| 73 | \n", - "Llama70B | \n", - "DE | \n", - "You are required to categorize the given statement into its correct category: Description, Entity, Expression, Human, Location, or Number. | \n", - "0.430 | \n", - "
| 77 | \n", - "Llama8B | \n", - "DE | \n", - "Determine the relevant category for this text based on the categories: Description, Entity, Expression, Human, Location, or Number. | \n", - "0.440 | \n", - "
| 81 | \n", - "Llama8B | \n", - "GA | \n", - "Assign the given sentence to one of the six categories (Description, Entity, Expression, Human, Location, or Number) and indicate the corresponding question type. | \n", - "0.515 | \n", - "
| 40 | \n", - "NaN | \n", - "NaN | \n", - "Determine the type of the given question and choose from Description, Entity, Expression, Human, Location and Number. | \n", - "0.595 | \n", - "
| 80 | \n", - "Llama70B | \n", - "GA | \n", - "Categorize the question into one of the six types - Description, Entity, Expression, Human, Location, or Number - and provide the relevant label. | \n", - "0.620 | \n", - "
| 78 | \n", - "Llama70B | \n", - "GA | \n", - "Identify the most suitable category (Description, Entity, Expression, Human, Location, or Number) for the provided text or question. | \n", - "0.630 | \n", - "
| 83 | \n", - "Llama8B | \n", - "GA | \n", - "Classify an English question into its corresponding category from the list ['Description', 'Entity', 'Expression', 'Human', 'Location', 'Number'] without providing additional information. | \n", - "0.660 | \n", - "
| 74 | \n", - "Llama70B | \n", - "DE | \n", - "As you examine the question, your task is to choose a type, from Description, Entity, Expression, Human, Location, or Number, by analyzing the input. | \n", - "0.665 | \n", - "
| 39 | \n", - "NaN | \n", - "NaN | \n", - "Please perform Question Classification task. Given the question, assign a label from ['Description', 'Entity', 'Expression', 'Human', 'Location', 'Number']. Return label only without any other text | \n", - "0.670 | \n", - "
| 82 | \n", - "Llama8B | \n", - "GA | \n", - "You are given a question. You need to detect which category better describes the question. Answer with \"Description\", \"Entity\", \"Expression\", \"Human\", \"Location\", and \"Number\". | \n", - "0.685 | \n", - "
| 72 | \n", - "Llama70B | \n", - "DE | \n", - "Your task is to choose a type of the question, from Description, Entity, Expression, Human, Location and Number. | \n", - "0.730 | \n", - "
| 79 | \n", - "Llama70B | \n", - "GA | \n", - "Your task is to choose a type of the question, from Description, Entity, Expression, Human, Location and Number. | \n", - "0.730 | \n", - "
| 75 | \n", - "Llama8B | \n", - "DE | \n", - "Your task is to choose a type of the question, from Description, Entity, Expression, Human, Location and Number. | \n", - "0.755 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 36 | \n", - "NaN | \n", - "NaN | \n", - "classify each sentence as either \"objective\" or \"subjective\". | \n", - "0.465 | \n", - "
| 38 | \n", - "NaN | \n", - "NaN | \n", - "evaluate each sentence as either objective or subjective. | \n", - "0.545 | \n", - "
| 65 | \n", - "Llama8B | \n", - "DE | \n", - "Determine whether the given text is expressing a subjective or objective sentiment and assign a label from ['subjective', 'objective'] using the provided instruction. | \n", - "0.570 | \n", - "
| 70 | \n", - "Llama8B | \n", - "GA | \n", - "Examiner, categorize movie reviews as objective or subjective, pinpointing their level of neutrality, and provide a detailed breakdown of each category, highlighting its distinct characteristics. | \n", - "0.590 | \n", - "
| 37 | \n", - "NaN | \n", - "NaN | \n", - "Your task is to classify the comment \"subjective\" or \"objective\". | \n", - "0.610 | \n", - "
| 68 | \n", - "Llama70B | \n", - "GA | \n", - "evaluate the given sentences and determine whether they are subjective or objective. | \n", - "0.615 | \n", - "
| 66 | \n", - "Llama70B | \n", - "GA | \n", - "Determine whether the provided statement is objective, conveying factual information, or subjective, expressing a personal viewpoint or bias. | \n", - "0.680 | \n", - "
| 69 | \n", - "Llama8B | \n", - "GA | \n", - "Determine the tone of the input text, classifying it as objective or subjective by identifying and explaining the linguistics features that contribute to its emotional or informative nature. | \n", - "0.685 | \n", - "
| 64 | \n", - "Llama8B | \n", - "DE | \n", - "identify whether the given sentence was expressing an objective or a subjective opinion. | \n", - "0.695 | \n", - "
| 62 | \n", - "Llama70B | \n", - "DE | \n", - "Considering its content, identify the nature of a passage as expressing a subjective or objective opinion from its wording. | \n", - "0.695 | \n", - "
| 71 | \n", - "Llama8B | \n", - "GA | \n", - "Analyze the provided sentences and categorize them as subjective or objective opinions, preserving their nuance and accuracy. | \n", - "0.695 | \n", - "
| 63 | \n", - "Llama8B | \n", - "DE | \n", - "Assess the given phrase and categorize it as either topic-neutral or opinion-based, determining whether it falls under attitude or reality. | \n", - "0.700 | \n", - "
| 67 | \n", - "Llama70B | \n", - "GA | \n", - "Classify the provided text as expressing either a factual or personal viewpoint, accompanied by a thorough justification for your categorization. | \n", - "0.770 | \n", - "
| 60 | \n", - "Llama70B | \n", - "DE | \n", - "Given an expression, you need to judge whether it represents subjective or objective opinion by assessing the context and meaning. | \n", - "0.780 | \n", - "
| 61 | \n", - "Llama70B | \n", - "DE | \n", - "Assess the given sentence and determine whether it is into subjective or objective opinion, evaluating the given sentences and their sentiment. | \n", - "0.785 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 22 | \n", - "Llama8B | \n", - "GA | \n", - "Determine the sentiment of the input text, categorizing it as positive, negative, optimistic, pessimistic, or neutral. | \n", - "0.805 | \n", - "
| 14 | \n", - "Llama70B | \n", - "DE | \n", - "As a sentiment analyzer, evaluate the input | \n", - "0.825 | \n", - "
| 12 | \n", - "Llama70B | \n", - "DE | \n", - "As a sentiment classifier, examine the user review statement and identify the sentiment orientation as either positive or negative, while understanding the meaning and any relevant context. | \n", - "0.855 | \n", - "
| 23 | \n", - "Llama8B | \n", - "GA | \n", - "Evaluate the sentiment of the provided statement and categorize it as either a 'positive' or 'negative' sentiment. | \n", - "0.870 | \n", - "
| 17 | \n", - "Llama8B | \n", - "DE | \n", - "Examine the statement and determine the emotional resonance of the text, evaluating whether it belongs to positive sentiment or a negative opinion. | \n", - "0.885 | \n", - "
| 19 | \n", - "Llama70B | \n", - "GA | \n", - "Given a sentence, classify it as either positive or negative sentiment. | \n", - "0.905 | \n", - "
| 15 | \n", - "Llama8B | \n", - "DE | \n", - "Given a sentence, classify it as either positive or negative sentiment. | \n", - "0.910 | \n", - "
| 21 | \n", - "Llama8B | \n", - "GA | \n", - "Identify the sentiment of the input text and determine its emotional tone as either 'positive' or 'negative', taking into account the nuances of the text and its context. | \n", - "0.915 | \n", - "
| 26 | \n", - "NaN | \n", - "NaN | \n", - "Given a tweet, classify it as having a positive or negative sentiment. | \n", - "0.915 | \n", - "
| 20 | \n", - "Llama70B | \n", - "GA | \n", - "Classify the sentiment of the provided sentence as either \"positive\" or \"negative\". | \n", - "0.925 | \n", - "
| 25 | \n", - "NaN | \n", - "NaN | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.930 | \n", - "
| 18 | \n", - "Llama70B | \n", - "GA | \n", - "Label the provided sentence with either \"positive\" or \"negative\" sentiment. | \n", - "0.945 | \n", - "
| 24 | \n", - "NaN | \n", - "NaN | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text. | \n", - "0.950 | \n", - "
| 16 | \n", - "Llama8B | \n", - "DE | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text. | \n", - "0.950 | \n", - "
| 13 | \n", - "Llama70B | \n", - "DE | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text. | \n", - "0.955 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 7 | \n", - "Llama70B | \n", - "GA | \n", - "Determine the primary theme of a given news article and classify it under one of the four categories: World, Sports, Tech, or Business. | \n", - "0.845 | \n", - "
| 8 | \n", - "Llama70B | \n", - "GA | \n", - "Determine the most suitable category (World, Sports, Business, or Tech) for a news article based on its dominant subject matter. | \n", - "0.845 | \n", - "
| 23 | \n", - "NaN | \n", - "NaN | \n", - "Give the main topic of the news article and then choose from World, Sports, Tech and Business. | \n", - "0.845 | \n", - "
| 3 | \n", - "Llama8B | \n", - "DE | \n", - "Choose a word from World, Sports, Business and Tech to categorize the given text. | \n", - "0.850 | \n", - "
| 6 | \n", - "Llama70B | \n", - "GA | \n", - "Determine the primary subject of the provided news article and classify it under one of the following categories: World, Sports, Business, or Tech. | \n", - "0.855 | \n", - "
| 2 | \n", - "Llama70B | \n", - "DE | \n", - "The goal is to identify the journal article according to its primary theme and determine whether it belongs to the World, Sports, Business, or Tech category.</prompt | \n", - "0.855 | \n", - "
| 5 | \n", - "Llama8B | \n", - "DE | \n", - "Give the main topic of the news article and then choose from World, Sports, Tech and Business. | \n", - "0.855 | \n", - "
| 4 | \n", - "Llama8B | \n", - "DE | \n", - "Classify the media report into one of the below-listed sections: World, Sports, Business, or Tech, considering the main topic. | \n", - "0.860 | \n", - "
| 22 | \n", - "NaN | \n", - "NaN | \n", - "Choose a word from World, Sports, Business and Tech to categorize the given text. | \n", - "0.870 | \n", - "
| 1 | \n", - "Llama70B | \n", - "DE | \n", - "Your task is to identify the subject of the news piece and classify it into one of four categories: World, Sports, Business and Tech. | \n", - "0.875 | \n", - "
| 11 | \n", - "Llama8B | \n", - "GA | \n", - "Classify the given news article into one of four categories (World, Sports, Business, or Tech) based on its main theme or subject matter. | \n", - "0.880 | \n", - "
| 10 | \n", - "Llama8B | \n", - "GA | \n", - "Classify a news article into one of four themed categories - World, Sports, Business, or Tech - based on its primary focus and emphasis. | \n", - "0.885 | \n", - "
| 21 | \n", - "NaN | \n", - "NaN | \n", - "Your responsibility is to assign a news article to one of four categories: World, Sports, Business, or Tech, based on its main idea. | \n", - "0.885 | \n", - "
| 9 | \n", - "Llama8B | \n", - "GA | \n", - "Your responsibility is to assign a news article to one of four categories: World, Sports, Business, or Tech, based on its main idea. | \n", - "0.885 | \n", - "
| 0 | \n", - "Llama70B | \n", - "DE | \n", - "Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\". | \n", - "0.890 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 28 | \n", - "Llama8B | \n", - "DE | \n", - "Analyze the phrase and categorize its emotional tone into one of the following labels: positive or negative as a sentiment classifier. | \n", - "0.770 | \n", - "
| 34 | \n", - "Llama8B | \n", - "GA | \n", - "Determine the sentiment of the input, classifying it as positive or negative emotional tone. | \n", - "0.785 | \n", - "
| 31 | \n", - "Llama70B | \n", - "GA | \n", - "Classify the sentiment of the provided sentence or review as either \"positive\" or \"negative\", indicating the attitude towards the subject. | \n", - "0.835 | \n", - "
| 29 | \n", - "NaN | \n", - "NaN | \n", - "Given a tweet, classify it as having a positive or negative sentiment. | \n", - "0.845 | \n", - "
| 33 | \n", - "Llama8B | \n", - "GA | \n", - "Assess the given text and categorize it as either a positive or negative sentiment. | \n", - "0.850 | \n", - "
| 29 | \n", - "Llama8B | \n", - "DE | \n", - "Given a statement, classify it as expressing a positive or negative opinion. | \n", - "0.885 | \n", - "
| 24 | \n", - "Llama70B | \n", - "DE | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.895 | \n", - "
| 25 | \n", - "Llama70B | \n", - "DE | \n", - "Given an online message, your task is to classify it as expressing a \"positive\" or \"negative\" opinion, considering whether it is written with a favorable or unfavorable attitude. | \n", - "0.915 | \n", - "
| 32 | \n", - "Llama70B | \n", - "GA | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text | \n", - "0.915 | \n", - "
| 35 | \n", - "Llama8B | \n", - "GA | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text | \n", - "0.920 | \n", - "
| 28 | \n", - "NaN | \n", - "NaN | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.920 | \n", - "
| 26 | \n", - "Llama70B | \n", - "DE | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text | \n", - "0.925 | \n", - "
| 30 | \n", - "Llama70B | \n", - "GA | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.925 | \n", - "
| 27 | \n", - "Llama8B | \n", - "DE | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.925 | \n", - "
| 27 | \n", - "NaN | \n", - "NaN | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text | \n", - "0.925 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 32 | \n", - "NaN | \n", - "NaN | \n", - "Your objective is to analyze the movie review and allocate it to one of five categories, from terrible to great. | \n", - "0.040 | \n", - "
| 39 | \n", - "Llama8B | \n", - "DE | \n", - "Analyze the movie criticism provided to you into one of five categories based on the sentiment: terrible, bad, okay, good, or great, while considering the context and tone of the movie. | \n", - "0.350 | \n", - "
| 40 | \n", - "Llama8B | \n", - "DE | \n", - "Analyze the given text and assign it to one of the following categories: terrible, bad, okay, good, or great, considering the relevant context. | \n", - "0.365 | \n", - "
| 41 | \n", - "Llama8B | \n", - "DE | \n", - "Evaluate the text provided and categorize movie reviews into one of the following categories: terrible, bad, okay, good, or great. | \n", - "0.470 | \n", - "
| 45 | \n", - "Llama8B | \n", - "GA | \n", - "Evaluate the emotional tone of the text and categorize it into one of five sentiment categories (terrible, bad, okay, good, or great) based on the presence of positive and negative sentiments, providing a precise and nuanced classification. | \n", - "0.500 | \n", - "
| 37 | \n", - "Llama70B | \n", - "DE | \n", - "Classify the movie review provided to you into one of five categories based on the sentiment: terrible, bad, okay, good, or great. | \n", - "0.505 | \n", - "
| 47 | \n", - "Llama8B | \n", - "GA | \n", - "Identify the sentiment of the given text and categorize it as 'terrible', 'bad', 'okay', 'good', or 'great' based on its tone and language, outputting the corresponding sentiment label. | \n", - "0.520 | \n", - "
| 31 | \n", - "NaN | \n", - "NaN | \n", - "In this task, you are given movie reviews. Based on it, classify it to one of the five classes: (1) terrible, (2) bad, (3) okay, (4) good, and (5) great. | \n", - "0.535 | \n", - "
| 36 | \n", - "Llama70B | \n", - "DE | \n", - "Based on the given movie review, rate it into one of five ratings based on the sentiment: terrible, bad, okay, good, or great. | \n", - "0.560 | \n", - "
| 43 | \n", - "Llama70B | \n", - "GA | \n", - "Assign a sentiment label ('terrible', 'bad', 'okay', 'good', or 'great') to the provided movie review, reflecting the overall emotional tone of the text. | \n", - "0.560 | \n", - "
| 38 | \n", - "Llama70B | \n", - "DE | \n", - "Analyze the sentence and categorize it into one of five categories based on the sentiment: terrible, bad, okay, good, or great. | \n", - "0.560 | \n", - "
| 44 | \n", - "Llama70B | \n", - "GA | \n", - "Rate the emotional tone of the provided text as terrible, bad, okay, good, or great, reflecting the overall sentiment expressed. | \n", - "0.570 | \n", - "
| 42 | \n", - "Llama70B | \n", - "GA | \n", - "Determine the sentiment of the given text by labeling it as 'terrible', 'bad', 'okay', 'good', or 'great' according to the author's expressed emotions. | \n", - "0.605 | \n", - "
| 46 | \n", - "Llama8B | \n", - "GA | \n", - "Classify the provided comment according to its sentiment intensity, assigning a label from ['terrible', 'bad', 'okay', 'good', 'great'] without providing additional context. | \n", - "0.615 | \n", - "
| 30 | \n", - "NaN | \n", - "NaN | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['terrible', 'bad', 'okay', 'good', 'great']. Return label only without any other text. | \n", - "0.620 | \n", - "
| \n", - " | meta_llm | \n", - "optimizer | \n", - "prompt | \n", - "test_score | \n", - "
|---|---|---|---|---|
| 50 | \n", - "Llama70B | \n", - "DE | \n", - "You will be responsible for evaluating the emotional tone in the input message and classify it as expressing a positive or negative opinion. | \n", - "0.835 | \n", - "
| 59 | \n", - "Llama8B | \n", - "GA | \n", - "Determine the emotional tone of the given text by deciphering its meaning and context, and categorize it as either positive or negative sentiment. | \n", - "0.855 | \n", - "
| 35 | \n", - "NaN | \n", - "NaN | \n", - "Given a tweet, classify it as having a positive or negative sentiment. | \n", - "0.890 | \n", - "
| 49 | \n", - "Llama70B | \n", - "DE | \n", - "Given a sentence, classify it as either positive or negative sentiment. | \n", - "0.895 | \n", - "
| 58 | \n", - "Llama8B | \n", - "GA | \n", - "Evaluate the emotional tone and sentiment of the provided text, categorizing its emotional connotation as 'strongly positive', 'positive', 'neutral', 'negative', or 'strongly negative', and provide a nuanced intensity level if needed, or 'positive' or 'negative' if the sentiment is straightforward. | \n", - "0.905 | \n", - "
| 56 | \n", - "Llama70B | \n", - "GA | \n", - "Identify the emotional tone of the text, categorizing it as either \"positive\" or \"negative\" sentiment. | \n", - "0.910 | \n", - "
| 53 | \n", - "Llama8B | \n", - "DE | \n", - "Analyze a review and classify it as expressing a positive or negative opinion. | \n", - "0.910 | \n", - "
| 57 | \n", - "Llama8B | \n", - "GA | \n", - "Given a tweet, classify it as having a positive or negative sentiment. | \n", - "0.915 | \n", - "
| 48 | \n", - "Llama70B | \n", - "DE | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.945 | \n", - "
| 34 | \n", - "NaN | \n", - "NaN | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.945 | \n", - "
| 52 | \n", - "Llama8B | \n", - "DE | \n", - "Examine the review and classify it as having a positive or negative sentiment, while considering the tone and context. | \n", - "0.950 | \n", - "
| 51 | \n", - "Llama8B | \n", - "DE | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.950 | \n", - "
| 55 | \n", - "Llama70B | \n", - "GA | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.950 | \n", - "
| 54 | \n", - "Llama70B | \n", - "GA | \n", - "Your task is to classify the comment \"positive\" or \"negative\". | \n", - "0.950 | \n", - "
| 33 | \n", - "NaN | \n", - "NaN | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text. | \n", - "0.960 | \n", - "
| \n", - " | task | \n", - "optimizer | \n", - "meta_llm | \n", - "downstream_llm | \n", - "evaluation_llm | \n", - "random_seed | \n", - "prompt | \n", - "train_score | \n", - "test_score | \n", - "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", - "agnews | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic. | \n", - "0.95 | \n", - "0.885 | \n", - "
| 1 | \n", - "agnews | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\". | \n", - "0.90 | \n", - "0.890 | \n", - "
| 2 | \n", - "agnews | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "The objective is to assign a news article to one of the following categories: World, Sports, Business, or Tech, based on its main topic. | \n", - "1.00 | \n", - "0.880 | \n", - "
| 3 | \n", - "agnews | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Choose a word from World, Sports, Business and Tech to categorize the given text. | \n", - "1.00 | \n", - "0.830 | \n", - "
| 4 | \n", - "agnews | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Classify the given news article into one of the four categories ['World', 'Sports', 'Business', or 'Tech'] based on its primary theme and main topic, ensuring accurate categorization. | \n", - "1.00 | \n", - "0.880 | \n", - "
| 5 | \n", - "agnews | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Classify the given news article into one of the four main categories: World, Sports, Business, or Tech, based on the article's topic. | \n", - "0.95 | \n", - "0.870 | \n", - "
| 6 | \n", - "cr | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Consider customer reviews and analyze them to determine their emotional tone, classifying them as expressing either positive or negative sentiment. | \n", - "0.95 | \n", - "0.785 | \n", - "
| 7 | \n", - "cr | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "As a sentiment classifier, examine the text passage for sentiment in customer reviews, by assessing the overall emotional direction, and classify the expression as either positive or negative. | \n", - "0.95 | \n", - "0.855 | \n", - "
| 8 | \n", - "cr | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "You will be tasked with analyzing text to determine its emotional tone, identifying whether it expresses a positive or negative sentiment, while considering the broader context. | \n", - "1.00 | \n", - "0.855 | \n", - "
| 9 | \n", - "cr | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Classify this customer review as expressing either a \"positive\" or \"negative\" sentiment, analyzing its tone and content. | \n", - "1.00 | \n", - "0.940 | \n", - "
| 10 | \n", - "cr | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Classify the provided text as having positive or negative sentiment, determining the corresponding sentiment label from ['negative', 'positive']. | \n", - "1.00 | \n", - "0.930 | \n", - "
| 11 | \n", - "cr | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Given a review, classify it as expressing a positive or negative sentiment. | \n", - "0.95 | \n", - "0.930 | \n", - "
| 12 | \n", - "mr | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Given a tweet, classify it as having a positive or negative sentiment. | \n", - "0.95 | \n", - "0.885 | \n", - "
| 13 | \n", - "mr | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "To categorize the sentiment of text snippets in a movie review, classify the sentiment as either 'negative' or 'positive', taking into consideration | \n", - "0.95 | \n", - "0.760 | \n", - "
| 14 | \n", - "mr | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Given a tweet, classify it as having a positive or negative sentiment. | \n", - "0.95 | \n", - "0.890 | \n", - "
| 15 | \n", - "mr | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Classify a movie review as expressing either 'negative' or 'positive' sentiment by identifying the underlying emotion. | \n", - "1.00 | \n", - "0.915 | \n", - "
| 16 | \n", - "mr | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Based on the provided movie review, please classify its sentiment as either \"positive\" or \"negative\" according to the given binary sentiment annotations. | \n", - "1.00 | \n", - "0.905 | \n", - "
| 17 | \n", - "mr | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Classify the movie review text, determining whether it expresses a positive or negative sentiment, and output the corresponding label ('positive' or 'negative') | \n", - "1.00 | \n", - "0.850 | \n", - "
| 18 | \n", - "sst-5 | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Examine the movie critique and allocate it to one of the following categories: terrible, bad, okay, good, great, while considering the sentiment. | \n", - "0.50 | \n", - "0.450 | \n", - "
| 19 | \n", - "sst-5 | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "The object is to classify movie reviews into one of the following categories: terrible, bad, okay, good, or great, based on the sentiment. | \n", - "0.65 | \n", - "0.570 | \n", - "
| 20 | \n", - "sst-5 | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Analyze movie reviews and classify them into one of the following categories: terrible, bad, okay, good, or great. | \n", - "0.55 | \n", - "0.400 | \n", - "
| 21 | \n", - "sst-5 | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Classify the sentiment of the given movie review among ['terrible', 'bad', 'okay', 'good', 'great'] based on its emotional tone, leveraging your language understanding | \n", - "0.65 | \n", - "0.565 | \n", - "
| 22 | \n", - "sst-5 | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Classify the text corresponding to a movie review into one of the following five sentiment categories: terrible, bad, okay, good | \n", - "0.65 | \n", - "0.490 | \n", - "
| 23 | \n", - "sst-5 | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Transform the given movie review to one of the following sentiment categories: terrible, bad, okay, good, or great, accurately capturing its essence. Return a corresponding sentiment label from the list. | \n", - "0.60 | \n", - "0.470 | \n", - "
| 24 | \n", - "sst2 | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text. | \n", - "1.00 | \n", - "0.940 | \n", - "
| 25 | \n", - "sst2 | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "As a sentiment classifier, consider a movie review sentence and analyze its emotional tone, classifying it as either positive or negative sentiment, while taking into account its meaning and context. | \n", - "1.00 | \n", - "0.815 | \n", - "
| 26 | \n", - "sst2 | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Examine the written opinion in a movie review and classify it as either \"positive\" or \"negative\", considering the sentence meaning and relevant context. | \n", - "1.00 | \n", - "0.880 | \n", - "
| 27 | \n", - "sst2 | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Please perform Sentiment Classification task. Given the sentence, assign a sentiment label from ['negative', 'positive']. Return label only without any other text. | \n", - "1.00 | \n", - "0.945 | \n", - "
| 28 | \n", - "sst2 | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Determine whether the given movie review expresses strongly positive or strongly negative sentiment. | \n", - "1.00 | \n", - "0.935 | \n", - "
| 29 | \n", - "sst2 | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Given a movie review, use context and sentiment cues to predict whether it's 'positive' or 'negative', and return the corresponding label. | \n", - "1.00 | \n", - "0.910 | \n", - "
| 30 | \n", - "subj | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Your task is to examine sentences from movie reviews and understand the purpose of the utterance, and then determine the intention behind the statement, by classifying them as either subjective or objective | \n", - "0.80 | \n", - "0.750 | \n", - "
| 31 | \n", - "subj | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "determine the type of a sentence fragment for its tone, and identify whether it has a certain nuance, carrying a particular sentiment, or having a subjective or objective tone, to classify it as subjective or objective. | \n", - "0.75 | \n", - "0.585 | \n", - "
| 32 | \n", - "subj | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Determine the intent of the given text and designate it as either subjective or objective, taking into account the meaning and context | \n", - "0.85 | \n", - "0.630 | \n", - "
| 33 | \n", - "subj | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Classify the sentence and determine its perspective, distinguishing between subjective expressions of personal opinions or feelings and objective presentations of factual information, while considering linguistic nuances, contextual clues, and subtle linguistic cues. | \n", - "0.80 | \n", - "0.705 | \n", - "
| 34 | \n", - "subj | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "Review the provided sentence and categorize it as either subjective (emotive) or objective (neutral), based on its linguistic and emotional characteristics. | \n", - "0.75 | \n", - "0.695 | \n", - "
| 35 | \n", - "subj | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Given a statement, classify it as expressing a subjective or objective opinion. | \n", - "0.80 | \n", - "0.755 | \n", - "
| 36 | \n", - "trec | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Analyze the question to determine its type by including categories of Description, Entity, Expression, Human, Location, and Number; categorize the question into one of the following categories: Description, Entity, Expression, Human, Location, or Number, while considering the meaning and relevant context. | \n", - "0.55 | \n", - "0.630 | \n", - "
| 37 | \n", - "trec | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "You are given a question. You need to detect which category better describes the question. Answer with \"Description\", \"Entity\", \"Expression\", \"Human\", \"Location\", and \"Number\". | \n", - "0.55 | \n", - "0.645 | \n", - "
| 38 | \n", - "trec | \n", - "evopromptde | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Your task is to choose a type of the question, from Description, Entity, Expression, Human, Location and Number. | \n", - "0.45 | \n", - "0.750 | \n", - "
| 39 | \n", - "trec | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "42 | \n", - "Please classify each question into its primary category from the following options: Description, Entity, Expression, Human, Location, Number. | \n", - "0.65 | \n", - "0.605 | \n", - "
| 40 | \n", - "trec | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "47 | \n", - "For each question, select the primary category (Description, Entity, Expression, Human, Location, or Number). | \n", - "0.55 | \n", - "0.685 | \n", - "
| 41 | \n", - "trec | \n", - "evopromptga | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "meta-llama/Meta-Llama-3-70B-Instruct | \n", - "meta-llama\\Meta-Llama-3-8B-Instruct | \n", - "69 | \n", - "Recognize the primary category of each question, choosing from Description, Entity, Expression, Human, Location, or Number, and return the most fitting label. | \n", - "0.70 | \n", - "0.615 | \n", - "
| \n", - " | score | \n", - "test_score | \n", - "
|---|---|---|
| score | \n", - "1.000000 | \n", - "0.891781 | \n", - "
| test_score | \n", - "0.891781 | \n", - "1.000000 | \n", - "
| \n", - " | score | \n", - "test_score | \n", - "
|---|---|---|
| score | \n", - "1.000000 | \n", - "0.769682 | \n", - "
| test_score | \n", - "0.769682 | \n", - "1.000000 | \n", - "
| \n", - " | \n", - " | train_score | \n", - "test_score | \n", - "diff | \n", - "
|---|---|---|---|---|
| meta_llm | \n", - "optimizer | \n", - "\n", - " | \n", - " | \n", - " |
| Llama-70B | \n", - "DE | \n", - "89.76 | \n", - "79.74 | \n", - "-10.02 | \n", - "
| GA | \n", - "92.14 | \n", - "78.26 | \n", - "-13.88 | \n", - "|
| Llama-8B | \n", - "DE | \n", - "81.67 | \n", - "74.52 | \n", - "-7.14 | \n", - "
| GA | \n", - "85.00 | \n", - "74.50 | \n", - "-10.50 | \n", - "