From 4cb7a65914474488680796f951707329ee025129 Mon Sep 17 00:00:00 2001 From: finitearth Date: Sat, 12 Oct 2024 11:14:38 +0200 Subject: [PATCH 1/8] implemented random selector --- notebooks/getting_started.ipynb | 212 +++++++++++++++++- poetry.lock | 126 ++++++++++- promptolution/config.py | 4 + .../base_exemplar_selector.py | 6 + .../exemplar_selectors/random_selector.py | 19 ++ promptolution/helpers.py | 5 + promptolution/llms/api_llm.py | 9 +- promptolution/predictors/base_predictor.py | 39 ++-- promptolution/predictors/classificator.py | 47 ++-- promptolution/tasks/classification_tasks.py | 30 ++- pyproject.toml | 1 + 11 files changed, 436 insertions(+), 62 deletions(-) create mode 100644 promptolution/exemplar_selectors/base_exemplar_selector.py create mode 100644 promptolution/exemplar_selectors/random_selector.py diff --git a/notebooks/getting_started.ipynb b/notebooks/getting_started.ipynb index 5807cd8..9401734 100644 --- a/notebooks/getting_started.ipynb +++ b/notebooks/getting_started.ipynb @@ -64,9 +64,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\tzehl\\Documents\\programming\\promptolution\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from promptolution.helpers import run_experiment\n", "from promptolution.config import Config" @@ -81,7 +90,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -90,31 +99,220 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "config = Config(\n", " task_name=\"agnews\",\n", " ds_path=\"../data_sets/cls/agnews/\",\n", - " n_steps=3,\n", + " n_steps=8,\n", " optimizer=\"evopromptga\",\n", " meta_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " evaluation_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " downstream_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " api_token=token,\n", + " prepend_examplars=True,\n", + " n_examplars=5\n", ")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nVa. Tech speeds up Mac OS X supercomputer by almost 20 OCTOBER 26, 2004 (MACCENTRAL) - Virginia Tech #39;s all-Mac OS X supercomputer at the university #39;s Terascale Computing Facility made headlines last year when it was determined to be the third-fastest supercomputer in the world.I would categorize this news article as \"Tech\". The primary theme of the article is about a Virginia Tech supercomputer and its speed, mentioning Mac OS X and referring to it as the third-fastest supercomputer in the world.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nCellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.I would categorize this news article under the \"Tech\" category. The primary theme of the article is a new smartphone feature being developed by Siemens Mobile, which is a technology company.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nMariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.I would categorize this news article as \"Sports\" as it revolves around the firing of a manager in a professional sports team, specifically the Seattle Mariners.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nMariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.Based on the primary theme, I would categorize this news article under the Sports category. The article is about the Seattle Mariners firing their manager, Scott Servais, and Bavasi suggesting he be considered for another managerial vacancy, making it a sports-related news article.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nVenture by Time Warner And Microsoft Unsettles EU A Bethesda company owned by Microsoft Corp. and Time Warner Inc. is the subject of a European Union investigation into whether the company #39;s software will give the two media giants a monopoly in copyright protection.Based on the primary theme of the article, I would categorize it as \"Business\". The article discusses a European Union investigation into a business deal or merger between Time Warner and Microsoft, which is likely to have significant implications for the business industry.']\n", + "[\"You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nUnlikely Visionary Behind Private Race to Space SpaceShipOne visionary Peter Diamandis wasn't thinking about history as he stood in the Mojave desert and watched a small, shuttlecock-shaped craft glide back to Earth having nudged the edge of space. He just thought it looked beautiful.I would classify this news article as Tech. The primary topic is Peter Diamandis and his private space program, SpaceShipOne, which is a technological achievement in the field of space exploration.\", 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nNew fertility treatment \\'closer\\' Women may be able to have unfertilised eggs routinely frozen during fertility treatment rather than just embryos after research.I would classify this news article as \"Health/Science\" (which could also fit under \"World\")', 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nWell, it worked for Bill So let me get this straight: Barry Bonds used steroids, but he didn #39;t inhale?***Mr. Schwarzenegger, meet your new lieutenant governor.I would classify this news article as Sports. The primary topic appears to be about an athlete, Barry Bonds, and his use of steroids.', 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nMariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.I would classify this news article as Sports.', 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nIBM updates WebSphere Application Server software OCTOBER 06, 2004 (COMPUTERWORLD) - IBM today unveiled a new version of its WebSphere Application Server software, which is designed to protect applications from server downtime and boost enterprise efforts to build a service-oriented architecture (SOA).I would classify this news article as Tech. The article is primarily about IBM updating its WebSphere Application Server software, which is a topic related to technology.']\n", + "['You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nGuarding of Ukraine central election commission relieved KIEV, November 3 (Itar-Tass) - Water cannons and armoured personnel carriers were removed from the territory of the Ukrainian central election commission on Wednesday.I would classify this news article as World.', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nUpdate 6: Dutch Steelmaker Buying ISG in \\\\$4.5B Deal Steel tycoon Lakshmi Mittal announced plans Monday to take over US steelmaker International Steel Group as part of a three-way merger to create a global metals mammoth.I would classify this news article as Business.', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nAustralia red-faced BUOYANT New Zealand captain Stephen Fleming was relieved to have enlivened a dormant summer after his Black Caps conjured a stunning four-wicket win against Australia in the opening Sports', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nInjury fears allayed over Celtic #39;s McNamara Injury fears over Celtic skipper Jackie McNamara have subsided after a scan revealed he has not broken his ankle. It was initially feared that McNamara had broken his ankle after a clash against Barcelona #39;s Ludovic Giuly on Tuesday night.I would classify this article as **Sports**, as it is about an athlete (Jackie McNamara) from a sports team (Celtic) and an injury he sustained during a match.', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nNASA Develops Robust Artificial Intelligence for Planetary Rovers NASA is planning to add a strong dose of artificial intelligence (AI) to planetary rovers to make them much more self-reliant, capable of making basic decisions during a mission. Scientists are developing very complex AI software that enables a higher level of robotic intelligence.I would classify this news article as **Tech**.']\n", + "['Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nCellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.I would categorize this news article under the category of \"Tech\". The core theme of the article is about a new innovative technology being developed by Siemens Mobile, a company that specializes in telecommunications and mobile technologies. The article is discussing a specific feature of a mobile phone that detects bad breath, which falls under the broader category of technology and innovation.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nHenson plays for Cowboys, now will he start? Now Henson is waiting like everyone else to find out if he #39;ll soon be the starting quarterback. Coach Bill Parcells today said starter Vinny Testaverde is questionable because of a sore shoulder and back.I would categorize this news article under \"Sports\", specifically under the category of NFL (National Football League) news. The article is discussing the Dallas Cowboys and their quarterback situation, with a focus on Henson\\'s potential to start and the uncertainty surrounding Vinny Testaverde\\'s injury status.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nRyanair may face Belgian courts Budget airline Ryanair is obliged to be taken to court under EU law if it fails to pay back illegal state aid for flying to Belgium\\'s Charleroi airport.I would categorize this news article as \"Business\". The core theme of the article is that Ryanair, a budget airline, may face Belgian courts due to allegations of unpaid state aid, which is a business-related issue.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nMicrosoft delays SP2 auto update Slowdown will give big companies more time to hunt down compatibility problems, the company says.I would categorize this news article as \"Tech\" because it primarily discusses software updates and their impact on computer systems, specifically the delay of SP2 auto update by Microsoft.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nShould Your Next Car Be New or Used? Used cars have a lot to offer -- if you know what you\\'re doing.I would organize this news article into the category of \"Business\" since its core theme is about purchasing vehicles, specifically whether to buy a new or used car.']\n", + "['Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nDolby, Intel sound off for developers Companies plan tools for improving high-end audio for PCs with Intel inside.I would categorize this news article under \"Tech\" as the primary theme is the partnership between Dolby and Intel to develop tools for improving high-end audio for PCs with Intel processors.', 'Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nExplosion marks first day of Iraq campaigns Violence marred Iraq #39;s first official day of parliamentary election campaigning Wednesday as interim Iraqi Prime Minister Ayad Allawi announced his candidacy for the Jan. 30 I would categorize this news article under the category \"World\". The primary theme of the article is the political situation in Iraq, specifically the start of parliamentary election campaigns and the announcement of a candidate, which is a significant international event.', 'Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nYukos to stop oil exports to China Russian oil giant Yukos has decided to halt two-thirds of its oil exports to China, raising the stakes in its yearlong legal battle with the authorities.I would categorize this news article into the \"Business\" category, as the primary theme is the halt of oil exports to China by the Russian oil giant Yukos, which is a business-related issue.', 'Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nHalliburton to Restructure KBR Unit (Reuters) Reuters - Halliburton Co. said on\\\\Thursday it would restructure its KBR unit and may shed the\\\\business if the oil services company\\'s stock performance keeps\\\\lagging its peers.Based on the primary theme of the article, I would categorize it into the \"Business\" category. The article is focused on Halliburton\\'s plans to restructure its KBR unit and potentially sell it off due to lagging stock performance, making it a business-related news story.', \"Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nNL Wrap: Ramirez Shines in Cubs Victory Over Reds (Reuters) Reuters - Aramis Ramirez homered three times and\\\\drove in all of Chicago's runs as the Cubs edged the Cincinnati\\\\Reds 5-4 in National League play at the Great American Ballpark\\\\on Thursday.The primary theme of this news article is Sports, as it is about a baseball game played by the Chicago Cubs against the Cincinnati Reds.\"]\n", + "['Assign a topic category to the given news: World, Sports, Business, or Tech.\\nSchwab ex-CEO to get almost \\\\$10M SAN FRANCISCO (CBS.MW) -- Charles Schwab Corp. is paying David Pottruck, ousted as chief executive officer in July, a lump-sum payment of \\\\$6.I would categorize this news under \"Business\".', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nQB Tate to Lead Injured Hawkeyes Vs. PSU (AP) AP - No. 25 Iowa is having a hard time keeping its tailbacks healthy.I would assign the topic to the category \"Sports\".', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nNew fertility treatment \\'closer\\' Women may be able to have unfertilised eggs routinely frozen during fertility treatment rather than just embryos after research.I would assign this topic to the category \"Health/Science\" (which is often split into multiple categories such as World, Science, Medicine, etc.). The topic is related to medical research and treatment, specifically in the area of fertility.', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nInflation remains well contained: US Fed US Federal Reserve said Wednesday that rate hikes can continue at a quot;measured pace quot; as quot;inflation and longer-term inflation expectations remain well contained quot;.I would assign this news to the topic category: Business', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nA young monk opens a rare window of candor in Tibet He confirms Chinese are diluting traditional Buddhist culture and the monks\\' role.I would categorize this news under \"World\".']\n", + "['Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nDolby, Intel sound off for developers Companies plan tools for improving high-end audio for PCs with Intel inside.I would classify this news item as \"Tech\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nU.S. Stocks Climb as Oil Prices Drop NEW YORK (Reuters) - The biggest one-day drop in oil prices since September 2001 helped send U.S. stock prices higher on Wednesday.I would classify this news item as \"Business\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nUN war crime tribunal calls first witness in Milosevic defense <b>...</b> The International Criminal Tribunal for Former Yugoslavia (ICTY) Tuesday called the first witness in the defense case of former Yugoslav President Slobodan Milosevic I would classify this news item as \"World\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nNetworks in a Duel of Technology Some television networks are racing to develop eye-catching score boxes. Fox, for example, now has fire coming out of its baseball scores.I would classify this news item as \"Tech\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nAustralia red-faced BUOYANT New Zealand captain Stephen Fleming was relieved to have enlivened a dormant summer after his Black Caps conjured a stunning four-wicket win against Australia in the opening I classify this news item as \"Sports\".']\n", + "['Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nInjury fears allayed over Celtic #39;s McNamara Injury fears over Celtic skipper Jackie McNamara have subsided after a scan revealed he has not broken his ankle. It was initially feared that McNamara had broken his ankle after a clash against Barcelona #39;s Ludovic Giuly on Tuesday night.I would classify this news article into the theme: Sports.', \"Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nNintendo's innards Wondering what your shiny new Nintendo DS looks like on the inside? Lik Sang saves you the voided warranty with a new photo gallery showing a detailed dissection of the new handheld game machine. Missing LinksI would classify this news article into the theme: Tech\", 'Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nHackers Exploit Microsoft\\'s JPEG Flaw (AP) AP - In a harbinger of security threats to come, hackers have exploited a newly announced flaw in Microsoft Corp. programs and begun circulating malicious code hidden in images that use the popular JPEG format.I would classify this news article into the \"Tech\" theme. The article discusses a security flaw in Microsoft\\'s programs and how hackers are exploiting it, which is a topic related to technology.', 'Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nNew bid to end talks deadlock Northern Ireland Secretary Paul Murphy and Irish foreign minister Brian Cowen will chair talks involving unionists and nationalists at Stormont aimed at ending the stand-off over proposed changes to devolution.I would classify this news article into the \"World\" theme, as it deals with international relations, politics, and a specific geographical region (Northern Ireland).', 'Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nGuarding of Ukraine central election commission relieved KIEV, November 3 (Itar-Tass) - Water cannons and armoured personnel carriers were removed from the territory of the Ukrainian central election commission on Wednesday.I would classify this news article into the \"World\" theme, as it is focused on a political event in Ukraine.']\n", + "['In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nJapan Expresses #39;serious Concern #39; About N Korean Missiles In <b>...</b> BEIJING (AFP) - Japanese negotiators told North Korea they had quot;serious concern quot; about its missile program in talks that extended for a second day in Beijing, a Japanese diplomat said.I would classify this article as \"World\". The article discusses international relations, specifically the talks between Japan and North Korea about North Korea\\'s missile program, which is a global affairs topic.', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nJuve Lead Is Cut AC Milan closed the gap on leaders Juventus to four points after receiving a helping hand from city-rivals Inter. Having already won their game at Chievo earlier in the day thanks to on-loan Chelsea striker I would classify this article as \"Sports\".', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nJapan Expresses #39;serious Concern #39; About N Korean Missiles In <b>...</b> BEIJING (AFP) - Japanese negotiators told North Korea they had quot;serious concern quot; about its missile program in talks that extended for a second day in Beijing, a Japanese diplomat said.I would classify this article as \"World\". The article appears to be discussing a meeting or talks between Japan and North Korea regarding North Korea\\'s missile program, which is a global political issue.', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nMartha Stewart Appeals Conviction Lawyers for Martha Stewart told a federal appeals court that she suffered a \"\"fundamentally unfair\"\" trial at the hands of prosecutors who incorrectly suggested she was accused of insider trading.I would classify this article as \"Business\". The article discusses Martha Stewart\\'s appeal of her conviction, which is related to a business/corporate issue (insider trading).', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nDynogen enters drug licensing deal Dynogen Pharmaceuticals Inc. of Waltham said it licensed a late-stage drug candidate from Mitsubishi Pharma Corp. Terms weren\\'t disclosed. The compound, pumosetrag, is thought to be helpful to patients suffering from a variety of gastrointestinal conditions, such as irritable bowel syndrome. Dynogen, a privately held firm, said it plans to start new clinical trials with pumosetrag next year. Mitsubishi, part ...I would classify this article as \"Business\". The main topic of the article is about Dynogen Pharmaceuticals Inc. licensing a drug candidate from Mitsubishi Pharma Corp., which is a business-related news.']\n", + "['Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nSun refreshes development tools line Refreshing its development tools arsenal, Sun Microsystems on Tuesday revealed plans for upcoming versions of Java Studio Creator and is unleashing upgrades to the NetBeans open source platform and Java Studio Enterprise.The central topic of the news article is: Tech', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nDynogen enters drug licensing deal Dynogen Pharmaceuticals Inc. of Waltham said it licensed a late-stage drug candidate from Mitsubishi Pharma Corp. Terms weren\\'t disclosed. The compound, pumosetrag, is thought to be helpful to patients suffering from a variety of gastrointestinal conditions, such as irritable bowel syndrome. Dynogen, a privately held firm, said it plans to start new clinical trials with pumosetrag next year. Mitsubishi, part ...The central topic of the news article is \"Dynogen enters drug licensing deal with Mitsubishi Pharma Corp.\" and it falls under the category of **Business**.', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nMac users face rare virus The malware disables Mac OS X\\'s built-in firewall, steals personal information and can destroy data.The central topic of the news article is \"Mac users face rare virus\" and it falls under the category of **Tech**.', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nYahoo Jazzes Up Mobile Search Yahoo Jazzes Up Mobile Search\\\\\\\\Google SMS started serving search results to mobile users in the US just a few days back. Now Yahoo! follows it with even more comprehensive feature set. Google SMS is currently text based for the results; Yahoo! jazzes them up with maps, website icons, as well ...The central topic of the news article is \"Yahoo! Jazzes Up Mobile Search\" and falls under the category of **Tech**.', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nUpdate 6: Dutch Steelmaker Buying ISG in \\\\$4.5B Deal Steel tycoon Lakshmi Mittal announced plans Monday to take over US steelmaker International Steel Group as part of a three-way merger to create a global metals mammoth.The central topic of the news article is: Acquisition of International Steel Group by a Dutch Steelmaker\\n\\nCategory: Business']\n", + "['Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nWTO Rules Against EU Protection of Goods (AP) AP - The European Union said Friday its system of protecting names for regionally produced foods such as Champagne and Parma ham was strengthened by an initial ruling from the World Trade Organization.The main topic of the news article is \"WTO Rules Against EU Protection of Goods\".\\n\\nI would categorize this article under \"World\".', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nBentley hoping to avoid getting tripped up by Post A year ago, the Bentley Falcons ended C.W. Post\\'s two-year reign as Northeast-10 Conference football champions. Now the Pioneers have a chance to return the favor. Sitting atop the conference standings, C.W. Post (4-1, 4-0) hosts Bentley tomorrow and can inflict serious damage on any hopes the Falcons have of repeating.The main topic of this news article is \"Football: Bentley vs C.W. Post\".\\n\\nI choose: Sports', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nHurricane Ivan Threatens Barbados, East Caribbean MIAMI (Reuters) - Powerful Hurricane Ivan, the latest in a busy storm season, rushed over the Atlantic Ocean on Monday and threatened the eastern Caribbean Windward Islands.The main topic of the news article is: \"Hurricane Ivan Threatens Barbados, East Caribbean\"\\n\\nI would categorize this article under: **World**', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nFrance moves US plutonium south France sends plutonium from scrapped US nuclear warheads south under heavy guard to be reprocessed.The main topic of the news article is: France moves US plutonium\\n\\nCategory: World', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nSuns Defeat Jazz 115-102 (AP) AP - Steve Nash had 20 points and 14 assists to lead the Phoenix Suns to their eighth straight win, 115-102 over the Utah Jazz on Tuesday night.Main topic of the news article: Suns defeat Jazz in basketball game\\n\\nCategory: Sports']\n", + "['Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nCellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.I would classify the topic of this news as \"Tech\". The article is about a new technology development, specifically a mobile phone feature that detects and alerts users about bad breath, which is a technological innovation.', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nNetworks in a Duel of Technology Some television networks are racing to develop eye-catching score boxes. Fox, for example, now has fire coming out of its baseball scores.I would classify the topic of this news as \"Tech\"', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nEgypt Sees Move Towards Israel-Palestinian Peace JERUSALEM (Reuters) - Egypt\\'s official news agency said Tuesday significant progress had been made in international efforts to end Israeli-Palestinian violence, but both sides to the conflict termed talk of a deal premature.I would classify the topic of this news as \"World\".', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nDolby, Intel sound off for developers Companies plan tools for improving high-end audio for PCs with Intel inside.I would classify the topic of this news as \"Tech\".', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nAmericans playing it cool in the pool ATHENS Michael Phelps might receive his last gold medal of the Athens Olympics in a way no one would envision. I would classify this topic as \"Sports\".']\n", + "['Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nSpartans outplayed by George Washington in first round of BB amp;T <b>...</b> Washington, DC - The No. 11 MSU basketball team lost 96-83 to George Washington Saturday afternoon in the first-round of the BB amp;T Classic.I would classify this news article as \"Sports\".', 'Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nYukos faces \\\\$6.2bn tax deadline Russian oil giant Yukos says it is about to miss a deadline on a back tax payment of more than \\\\$6bn due from 2002.I would classify this news article as: Business', \"Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nHackers Exploit Microsoft's JPEG Flaw (AP) AP - In a harbinger of security threats to come, hackers have exploited a newly announced flaw in Microsoft Corp. programs and begun circulating malicious code hidden in images that use the popular JPEG format.I would classify this news article as **Tech**.\", \"Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nMinister offers apology for touchy remark Minister of Foreign Affairs Mark Chen () yesterday apologized for the words used to condemn a comment by his Singaporean counterpart who warned against Taiwan independence at a United Nations meeting last week.I would classify this news article as **World**. The article is about a minister's apology for a remark related to a United Nations meeting, which falls within the category of international relations and foreign affairs.\", 'Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nBekele Succeeds Mentor Gebrselassie ATHENS (Reuters) - Ethiopian Kenenisa Bekele succeeded his mentor Haile Gebrselassie as the Olympic 10,000 meters champion Friday after an astonishing final lap in the longest track event.I would classify this news article as \"Sports\".']\n", + "['Choose a word from World, Sports, Business and Tech to categorize the given text.\\nMoody #39;s raises Dell #39;s senior unsecured debt rating NEW YORK, Aug 18 - Moody #39;s Investors Service said on Wednesday it raised the senior unsecured debt rating of Dell Inc. (DELL.O: Quote, Profile, Research) with a stable outlook citing the firm #39;s strong balance sheet and solid operating and financial ...I would categorize this text under \"Business\".', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nAmericans playing it cool in the pool ATHENS Michael Phelps might receive his last gold medal of the Athens Olympics in a way no one would envision. I would categorize the given text under \"Sports\".', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nSchwab ex-CEO to get almost \\\\$10M SAN FRANCISCO (CBS.MW) -- Charles Schwab Corp. is paying David Pottruck, ousted as chief executive officer in July, a lump-sum payment of \\\\$6.I would categorize the given text under \"Business\".', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nHonor System Used for Flu Shots at U.S. Chain Stores By GRETCHEN PARKER BALTIMORE (AP) -- At the Walgreens flu shot clinic, the honor system is in place. If you tell nurses there you have a chronic heart problem, you can get a shot...I would categorize the given text under \"Tech\" because it mentions a specific retail store, Walgreens, and a technological concept, i.e., the \"honor system\", which is typically applied digitally in various applications, including online transactions and digital requests.', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nAmerican Alvin Harrison Banned for Four Years Olympic 400 meter relay gold medallist Alvin Harrison has accepted a four-year ban for drugs violations, the United States Anti-Doping Agency said on Tuesday.I would categorize the given text under \"Sports\" as it is related to a sports personality (Alvin Harrison) and his ban due to drug violations in a specific sport (Olympic 400 meter relay).']\n", + "['Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nRosy outlook from Greenspan fails to sway many economists WASHINGTON : The upbeat outlook from Federal Reserve Chairman Alan Greenspan has failed to convince many economists, who argue that the United States is likely to see tepid growth at best in the near future.Based on the main theme of the article, I would categorize it into Business. The article is discussing the outlook for the US economy and the opinions of economists, which falls under the category of business news.', 'Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nIBM updates WebSphere Application Server software OCTOBER 06, 2004 (COMPUTERWORLD) - IBM today unveiled a new version of its WebSphere Application Server software, which is designed to protect applications from server downtime and boost enterprise efforts to build a service-oriented architecture (SOA).I would categorize this news article as: **Tech**', \"Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nSony keeps PSP production low, focuses on PS2 Low initial shipment figures for Sony #39;s forthcoming PlayStation Portable are down to a decision to focus on manufacturing the profitable PlayStation 2 hardware, according to a spokesperson for the Japanese giant.I would categorize this news article as **Tech**. The article discusses Sony's production decisions related to their gaming consoles, specifically the PlayStation Portable (PSP) and the PlayStation 2 (PS2), which are both technology-related topics.\", 'Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nJuve Lead Is Cut AC Milan closed the gap on leaders Juventus to four points after receiving a helping hand from city-rivals Inter. Having already won their game at Chievo earlier in the day thanks to on-loan Chelsea striker Based on the main theme of the article, I would categorize it as \"Sports\". The article discusses a soccer match and the standings of teams in the league, which is a clear indication that it belongs to the Sports category.', 'Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nDefiant Sharon sets out Gaza plan Israeli PM Ariel Sharon pushes ahead with an accelerated plan to withdraw from the Gaza Strip despite opposition.Based on the main theme of the news article, I would categorize it as **World**. The article is focused on a political event in the Middle East, specifically the plan of Israeli Prime Minister Ariel Sharon to withdraw from the Gaza Strip, which is a significant international event.']\n" + ] + } + ], "source": [ "df = run_experiment(config)" ] }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
promptscore
0Your job is to determine whether a news articl...1.00
8In this task, you are given a news article. Yo...0.90
9Identify the central topic of the news article...0.90
1You will be required to classify a news articl...0.85
4Categorize the given news article into one of ...0.85
6Your task is to classify the news item as \"Wor...0.85
7Your objective is to classify a news article i...0.85
3Organize the given news article into its most ...0.75
12Classify news articles according to their prim...0.75
13Choose a word from World, Sports, Business and...0.75
2You will be given a news article and asked to ...0.60
5Assign a topic category to the given news: Wor...0.60
10Give the main topic of the news article and th...0.60
14Based on the main theme of given the news arti...0.55
11Classify the topic of the following news as \"W...0.40
\n", + "
" + ], + "text/plain": [ + " prompt score\n", + "0 Your job is to determine whether a news articl... 1.00\n", + "8 In this task, you are given a news article. Yo... 0.90\n", + "9 Identify the central topic of the news article... 0.90\n", + "1 You will be required to classify a news articl... 0.85\n", + "4 Categorize the given news article into one of ... 0.85\n", + "6 Your task is to classify the news item as \"Wor... 0.85\n", + "7 Your objective is to classify a news article i... 0.85\n", + "3 Organize the given news article into its most ... 0.75\n", + "12 Classify news articles according to their prim... 0.75\n", + "13 Choose a word from World, Sports, Business and... 0.75\n", + "2 You will be given a news article and asked to ... 0.60\n", + "5 Assign a topic category to the given news: Wor... 0.60\n", + "10 Give the main topic of the news article and th... 0.60\n", + "14 Based on the main theme of given the news arti... 0.55\n", + "11 Classify the topic of the following news as \"W... 0.40" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", + "Unlikely Visionary Behind Private Race to Space SpaceShipOne visionary Peter Diamandis wasn't thinking about history as he stood in the Mojave desert and watched a small, shuttlecock-shaped craft glide back to Earth having nudged the edge of space. He just thought it looked beautiful.I would classify this news article as Tech. The primary topic is Peter Diamandis and his private space program, SpaceShipOne, which is a technological achievement in the field of space exploration.\n", + "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", + "New fertility treatment 'closer' Women may be able to have unfertilised eggs routinely frozen during fertility treatment rather than just embryos after research.I would classify this news article as \"Health/Science\" (which could also fit under \"World\")\n", + "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", + "Well, it worked for Bill So let me get this straight: Barry Bonds used steroids, but he didn #39;t inhale?***Mr. Schwarzenegger, meet your new lieutenant governor.I would classify this news article as Sports. The primary topic appears to be about an athlete, Barry Bonds, and his use of steroids.\n", + "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", + "Mariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.I would classify this news article as Sports.\n", + "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", + "IBM updates WebSphere Application Server software OCTOBER 06, 2004 (COMPUTERWORLD) - IBM today unveiled a new version of its WebSphere Application Server software, which is designed to protect applications from server downtime and boost enterprise efforts to build a service-oriented architecture (SOA).I would classify this news article as Tech. The article is primarily about IBM updating its WebSphere Application Server software, which is a topic related to technology.\n", + "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n" + ] + } + ], + "source": [ + "print(df.loc[1, \"prompt\"])" + ] + }, { "cell_type": "code", "execution_count": 11, diff --git a/poetry.lock b/poetry.lock index c3c7763..1938d9c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1376,6 +1376,17 @@ files = [ {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"}, ] +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + [[package]] name = "jsonpatch" version = "1.33" @@ -3361,6 +3372,106 @@ tensorflow = ["safetensors[numpy]", "tensorflow (>=2.11.0)"] testing = ["h5py (>=3.7.0)", "huggingface-hub (>=0.12.1)", "hypothesis (>=6.70.2)", "pytest (>=7.2.0)", "pytest-benchmark (>=4.0.0)", "safetensors[numpy]", "setuptools-rust (>=1.5.2)"] torch = ["safetensors[numpy]", "torch (>=1.10)"] +[[package]] +name = "scikit-learn" +version = "1.5.2" +description = "A set of python modules for machine learning and data mining" +optional = false +python-versions = ">=3.9" +files = [ + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:299406827fb9a4f862626d0fe6c122f5f87f8910b86fe5daa4c32dcd742139b6"}, + {file = "scikit_learn-1.5.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2d4cad1119c77930b235579ad0dc25e65c917e756fe80cab96aa3b9428bd3fb0"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c412ccc2ad9bf3755915e3908e677b367ebc8d010acbb3f182814524f2e5540"}, + {file = "scikit_learn-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a686885a4b3818d9e62904d91b57fa757fc2bed3e465c8b177be652f4dd37c8"}, + {file = "scikit_learn-1.5.2-cp310-cp310-win_amd64.whl", hash = "sha256:c15b1ca23d7c5f33cc2cb0a0d6aaacf893792271cddff0edbd6a40e8319bc113"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:03b6158efa3faaf1feea3faa884c840ebd61b6484167c711548fce208ea09445"}, + {file = "scikit_learn-1.5.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:1ff45e26928d3b4eb767a8f14a9a6efbf1cbff7c05d1fb0f95f211a89fd4f5de"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f763897fe92d0e903aa4847b0aec0e68cadfff77e8a0687cabd946c89d17e675"}, + {file = "scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8b0ccd4a902836493e026c03256e8b206656f91fbcc4fde28c57a5b752561f1"}, + {file = "scikit_learn-1.5.2-cp311-cp311-win_amd64.whl", hash = "sha256:6c16d84a0d45e4894832b3c4d0bf73050939e21b99b01b6fd59cbb0cf39163b6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:f932a02c3f4956dfb981391ab24bda1dbd90fe3d628e4b42caef3e041c67707a"}, + {file = "scikit_learn-1.5.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3b923d119d65b7bd555c73be5423bf06c0105678ce7e1f558cb4b40b0a5502b1"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, + {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, + {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, + {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, + {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, + {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, + {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, + {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca64b3089a6d9b9363cd3546f8978229dcbb737aceb2c12144ee3f70f95684b7"}, + {file = "scikit_learn-1.5.2-cp39-cp39-win_amd64.whl", hash = "sha256:3bed4909ba187aca80580fe2ef370d9180dcf18e621a27c4cf2ef10d279a7efe"}, + {file = "scikit_learn-1.5.2.tar.gz", hash = "sha256:b4237ed7b3fdd0a4882792e68ef2545d5baa50aca3bb45aa7df468138ad8f94d"}, +] + +[package.dependencies] +joblib = ">=1.2.0" +numpy = ">=1.19.5" +scipy = ">=1.6.0" +threadpoolctl = ">=3.1.0" + +[package.extras] +benchmark = ["matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "pandas (>=1.1.5)"] +build = ["cython (>=3.0.10)", "meson-python (>=0.16.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)"] +docs = ["Pillow (>=7.1.2)", "matplotlib (>=3.3.4)", "memory_profiler (>=0.57.0)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pydata-sphinx-theme (>=0.15.3)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)", "sphinx (>=7.3.7)", "sphinx-copybutton (>=0.5.2)", "sphinx-design (>=0.5.0)", "sphinx-design (>=0.6.0)", "sphinx-gallery (>=0.16.0)", "sphinx-prompt (>=1.4.0)", "sphinx-remove-toctrees (>=1.0.0.post1)", "sphinxcontrib-sass (>=0.3.4)", "sphinxext-opengraph (>=0.9.1)"] +examples = ["matplotlib (>=3.3.4)", "pandas (>=1.1.5)", "plotly (>=5.14.0)", "pooch (>=1.6.0)", "scikit-image (>=0.17.2)", "seaborn (>=0.9.0)"] +install = ["joblib (>=1.2.0)", "numpy (>=1.19.5)", "scipy (>=1.6.0)", "threadpoolctl (>=3.1.0)"] +maintenance = ["conda-lock (==2.5.6)"] +tests = ["black (>=24.3.0)", "matplotlib (>=3.3.4)", "mypy (>=1.9)", "numpydoc (>=1.2.0)", "pandas (>=1.1.5)", "polars (>=0.20.30)", "pooch (>=1.6.0)", "pyamg (>=4.0.0)", "pyarrow (>=12.0.0)", "pytest (>=7.1.2)", "pytest-cov (>=2.9.0)", "ruff (>=0.2.1)", "scikit-image (>=0.17.2)"] + +[[package]] +name = "scipy" +version = "1.14.1" +description = "Fundamental algorithms for scientific computing in Python" +optional = false +python-versions = ">=3.10" +files = [ + {file = "scipy-1.14.1-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:b28d2ca4add7ac16ae8bb6632a3c86e4b9e4d52d3e34267f6e1b0c1f8d87e389"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:d0d2821003174de06b69e58cef2316a6622b60ee613121199cb2852a873f8cf3"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:8bddf15838ba768bb5f5083c1ea012d64c9a444e16192762bd858f1e126196d0"}, + {file = "scipy-1.14.1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:97c5dddd5932bd2a1a31c927ba5e1463a53b87ca96b5c9bdf5dfd6096e27efc3"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ff0a7e01e422c15739ecd64432743cf7aae2b03f3084288f399affcefe5222d"}, + {file = "scipy-1.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8e32dced201274bf96899e6491d9ba3e9a5f6b336708656466ad0522d8528f69"}, + {file = "scipy-1.14.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8426251ad1e4ad903a4514712d2fa8fdd5382c978010d1c6f5f37ef286a713ad"}, + {file = "scipy-1.14.1-cp310-cp310-win_amd64.whl", hash = "sha256:a49f6ed96f83966f576b33a44257d869756df6cf1ef4934f59dd58b25e0327e5"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:2da0469a4ef0ecd3693761acbdc20f2fdeafb69e6819cc081308cc978153c675"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:c0ee987efa6737242745f347835da2cc5bb9f1b42996a4d97d5c7ff7928cb6f2"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3a1b111fac6baec1c1d92f27e76511c9e7218f1695d61b59e05e0fe04dc59617"}, + {file = "scipy-1.14.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:8475230e55549ab3f207bff11ebfc91c805dc3463ef62eda3ccf593254524ce8"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:278266012eb69f4a720827bdd2dc54b2271c97d84255b2faaa8f161a158c3b37"}, + {file = "scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fef8c87f8abfb884dac04e97824b61299880c43f4ce675dd2cbeadd3c9b466d2"}, + {file = "scipy-1.14.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b05d43735bb2f07d689f56f7b474788a13ed8adc484a85aa65c0fd931cf9ccd2"}, + {file = "scipy-1.14.1-cp311-cp311-win_amd64.whl", hash = "sha256:716e389b694c4bb564b4fc0c51bc84d381735e0d39d3f26ec1af2556ec6aad94"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:631f07b3734d34aced009aaf6fedfd0eb3498a97e581c3b1e5f14a04164a456d"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:af29a935803cc707ab2ed7791c44288a682f9c8107bc00f0eccc4f92c08d6e07"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:2843f2d527d9eebec9a43e6b406fb7266f3af25a751aa91d62ff416f54170bc5"}, + {file = "scipy-1.14.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:eb58ca0abd96911932f688528977858681a59d61a7ce908ffd355957f7025cfc"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:30ac8812c1d2aab7131a79ba62933a2a76f582d5dbbc695192453dae67ad6310"}, + {file = "scipy-1.14.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f9ea80f2e65bdaa0b7627fb00cbeb2daf163caa015e59b7516395fe3bd1e066"}, + {file = "scipy-1.14.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:edaf02b82cd7639db00dbff629995ef185c8df4c3ffa71a5562a595765a06ce1"}, + {file = "scipy-1.14.1-cp312-cp312-win_amd64.whl", hash = "sha256:2ff38e22128e6c03ff73b6bb0f85f897d2362f8c052e3b8ad00532198fbdae3f"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1729560c906963fc8389f6aac023739ff3983e727b1a4d87696b7bf108316a79"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:4079b90df244709e675cdc8b93bfd8a395d59af40b72e339c2287c91860deb8e"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e0cf28db0f24a38b2a0ca33a85a54852586e43cf6fd876365c86e0657cfe7d73"}, + {file = "scipy-1.14.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0c2f95de3b04e26f5f3ad5bb05e74ba7f68b837133a4492414b3afd79dfe540e"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b99722ea48b7ea25e8e015e8341ae74624f72e5f21fc2abd45f3a93266de4c5d"}, + {file = "scipy-1.14.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5149e3fd2d686e42144a093b206aef01932a0059c2a33ddfa67f5f035bdfe13e"}, + {file = "scipy-1.14.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4f5a7c49323533f9103d4dacf4e4f07078f360743dec7f7596949149efeec06"}, + {file = "scipy-1.14.1-cp313-cp313-win_amd64.whl", hash = "sha256:baff393942b550823bfce952bb62270ee17504d02a1801d7fd0719534dfb9c84"}, + {file = "scipy-1.14.1.tar.gz", hash = "sha256:5a275584e726026a5699459aa72f828a610821006228e841b94275c4a7c08417"}, +] + +[package.dependencies] +numpy = ">=1.23.5,<2.3" + +[package.extras] +dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodestyle", "pydevtool", "rich-click", "ruff (>=0.0.292)", "types-psutil", "typing_extensions"] +doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"] +test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"] + [[package]] name = "seaborn" version = "0.13.2" @@ -3525,6 +3636,17 @@ files = [ doc = ["reno", "sphinx"] test = ["pytest", "tornado (>=4.5)", "typeguard"] +[[package]] +name = "threadpoolctl" +version = "3.5.0" +description = "threadpoolctl" +optional = false +python-versions = ">=3.8" +files = [ + {file = "threadpoolctl-3.5.0-py3-none-any.whl", hash = "sha256:56c1e26c150397e58c4926da8eeee87533b1e32bef131bd4bf6a2f45f3185467"}, + {file = "threadpoolctl-3.5.0.tar.gz", hash = "sha256:082433502dd922bf738de0d8bcc4fdcbf0979ff44c42bd40f5af8a282f6fa107"}, +] + [[package]] name = "tiktoken" version = "0.7.0" @@ -4050,4 +4172,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "7f2a00d58b72f3b7cec0991808ffc354c5f10f87e1c78bd9ed4d5369932d243d" +content-hash = "6c9aacc81e214e934481f8764b4ecf4db4366f0860952bf045649e2b405f83a5" diff --git a/promptolution/config.py b/promptolution/config.py index 10000f5..12473fd 100644 --- a/promptolution/config.py +++ b/promptolution/config.py @@ -31,6 +31,8 @@ class Config: downstream_bs (int): Batch size for local downstream LLM. Defaults to None. api_token (str): API token for different APIs, as implemented in LLM classes. Defaults to None. meta_prompt (str): Prompt template for the meta LLM. Defaults to None. + prepend_examplars (bool): rather to do examplar search and prepend few-shot examples. Defaults to False. + n_examplars (int): how many examplars to prepend. Only used if prepend_examplars is True. Defaults to 5. """ task_name: str = None @@ -51,6 +53,8 @@ class Config: downstream_bs: Optional[int] = None api_token: Optional[str] = None meta_prompt: Optional[str] = None + prepend_examplars: Optional[bool] = False + n_examplars: Optional[int] = 5 def __post_init__(self): """Validate the configuration after initialization.""" diff --git a/promptolution/exemplar_selectors/base_exemplar_selector.py b/promptolution/exemplar_selectors/base_exemplar_selector.py new file mode 100644 index 0000000..82deb61 --- /dev/null +++ b/promptolution/exemplar_selectors/base_exemplar_selector.py @@ -0,0 +1,6 @@ +class BaseExemplarSelector: + def __init__(self): + pass + + def select_exemplars(self, prompt, n_examples: int = 5): + pass diff --git a/promptolution/exemplar_selectors/random_selector.py b/promptolution/exemplar_selectors/random_selector.py new file mode 100644 index 0000000..212ed05 --- /dev/null +++ b/promptolution/exemplar_selectors/random_selector.py @@ -0,0 +1,19 @@ +from promptolution.exemplar_selectors.base_exemplar_selector import BaseExemplarSelector + + +class RandomSelector(BaseExemplarSelector): + def __init__(self, task, predictor): + super().__init__() + self.task = task + self.predictor = predictor + + def select_exemplars(self, prompt, n_examples: int = 5): + # use shape for evaluation st correct or incorrect can be identified + examples = [] + while len(examples) < n_examples: + score, seq = self.task.evaluate(prompt, self.predictor, n_samples=1, return_seq=True) + if score == 1: + examples.append(seq[0]) + prompt = "\n".join(examples + [prompt]) + + return prompt diff --git a/promptolution/helpers.py b/promptolution/helpers.py index 11f942d..c1fdb46 100644 --- a/promptolution/helpers.py +++ b/promptolution/helpers.py @@ -6,6 +6,7 @@ import pandas as pd from promptolution.config import Config +from promptolution.exemplar_selectors.random_selector import RandomSelector from promptolution.llms import get_llm from promptolution.optimizers import get_optimizer from promptolution.predictors import Classificator @@ -54,6 +55,10 @@ def run_optimization(config: Config): prompts = optimizer.optimize(n_steps=config.n_steps) + if config.prepend_examplars: + selector = RandomSelector(task, predictor) + prompts = [selector.select_exemplars(p, n_examples=config.n_examplars) for p in prompts] + return prompts diff --git a/promptolution/llms/api_llm.py b/promptolution/llms/api_llm.py index df3410e..1c34709 100644 --- a/promptolution/llms/api_llm.py +++ b/promptolution/llms/api_llm.py @@ -72,14 +72,11 @@ def __init__(self, model_id: str, token: str = None): ValueError: If an unknown model identifier is provided. """ if "claude" in model_id: - ANTHROPIC_API_KEY = open("anthropictoken.txt", "r").read() if token is None else token - self.model = ChatAnthropic(model=model_id, api_key=ANTHROPIC_API_KEY) + self.model = ChatAnthropic(model=model_id, api_key=token) elif "gpt" in model_id: - OPENAI_API_KEY = open("openaitoken.txt", "r").read() if token is None else token - self.model = ChatOpenAI(model=model_id, api_key=OPENAI_API_KEY) + self.model = ChatOpenAI(model=model_id, api_key=token) else: - DEEPINFRA_API_KEY = open("deepinfratoken.txt", "r").read() if token is None else token - self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=DEEPINFRA_API_KEY) + self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=token) def get_response(self, prompts: List[str]) -> List[str]: """Get responses for a list of prompts in a synchronous manner. diff --git a/promptolution/predictors/base_predictor.py b/promptolution/predictors/base_predictor.py index 941ee9a..f256ae6 100644 --- a/promptolution/predictors/base_predictor.py +++ b/promptolution/predictors/base_predictor.py @@ -12,37 +12,30 @@ class BasePredictor: This class defines the interface that all concrete predictor implementations should follow. Attributes: - model_id (str): Identifier for the model used by the predictor. - classes (List[str]): List of possible class labels for classification tasks. + llm: The language model used for generating predictions. + Methods: predict: An abstract method that should be implemented by subclasses to make predictions based on prompts and input data. """ - def __init__(self, model_id, classes, *args, **kwargs): - """Initialize the BasePredictor. + def __init__(self, llm): + """Initialize the Classificator. Args: - model_id (str): Identifier for the model to use. - classes (List[str]): List of possible class labels. - *args: Variable length argument list. - **kwargs: Arbitrary keyword arguments. + llm: The language model to use for predictions. + classes (List[str]): The list of valid class labels. """ - self.model_id = model_id - self.classes = classes + self.llm = llm - @abstractmethod - def predict( - self, - prompts: List[str], - xs: np.ndarray, - ) -> np.ndarray: + def predict(self, prompts: List[str], xs: np.ndarray, return_seq: bool = False) -> np.ndarray: """Abstract method to make predictions based on prompts and input data. Args: prompts (List[str]): List of prompts to use for prediction. xs (np.ndarray): Array of input data. + return_seq (bool, optional): rather to return the generating sequence Returns: np.ndarray: Array of predictions. @@ -50,6 +43,20 @@ def predict( Raises: NotImplementedError: If not implemented by a subclass. """ + + if isinstance(prompts, str): + prompts = [prompts] + + inputs = [prompt + "\n" + x for prompt in prompts for x in xs] + outputs = self.llm.get_response(inputs) + preds = self._extract_preds(outputs, (len(prompts), len(xs))) + + if return_seq: + return preds, [i + "\n" + o for i, o in zip(inputs, outputs)] + + return preds + + def _extract_preds(self, preds, shape): raise NotImplementedError diff --git a/promptolution/predictors/classificator.py b/promptolution/predictors/classificator.py index 7cf6fe9..c4941e1 100644 --- a/promptolution/predictors/classificator.py +++ b/promptolution/predictors/classificator.py @@ -28,39 +28,36 @@ def __init__(self, llm, classes, *args, **kwargs): llm: The language model to use for predictions. classes (List[str]): The list of valid class labels. """ - self.llm = llm + super().__init__(llm) self.classes = classes - def predict( - self, - prompts: List[str], - xs: np.ndarray, - ) -> np.ndarray: - """Predict classes for given prompts and input data. + # def predict( + # self, + # prompts: List[str], + # xs: np.ndarray, + # ) -> np.ndarray: + # """Predict classes for given prompts and input data. - This method generates predictions using the language model and then - extracts the predicted class from the model's output. + # This method generates predictions using the language model and then + # extracts the predicted class from the model's output. - Args: - prompts (List[str]): The list of prompts to use for prediction. - xs (np.ndarray): The input data array. - - Returns: - np.ndarray: A 2D array of predicted classes, with shape (len(prompts), len(xs)). + # Args: + # prompts (List[str]): The list of prompts to use for prediction. + # xs (np.ndarray): The input data array. - Note: - The method concatenates each prompt with each input data point, - passes it to the language model, and then extracts the first word - in the response that matches a class in self.classes. - """ - if isinstance(prompts, str): - prompts = [prompts] + # Returns: + # np.ndarray: A 2D array of predicted classes, with shape (len(prompts), len(xs)). - preds = self.llm.get_response([prompt + "\n" + x for prompt in prompts for x in xs]) + # Note: + # The method concatenates each prompt with each input data point, + # passes it to the language model, and then extracts the first word + # in the response that matches a class in self.classes. + # """ + def _extract_preds(self, preds, shape): response = [] for pred in preds: - predicted_class = "" + predicted_class = self.classes[0] # use first class as default pred for word in pred.split(" "): word = "".join([c for c in word if c.isalnum()]) if word in self.classes: @@ -69,5 +66,5 @@ def predict( response.append(predicted_class) - response = np.array(response).reshape(len(prompts), len(xs)) + response = np.array(response).reshape(*shape) return response diff --git a/promptolution/tasks/classification_tasks.py b/promptolution/tasks/classification_tasks.py index 9da7a17..efa200f 100644 --- a/promptolution/tasks/classification_tasks.py +++ b/promptolution/tasks/classification_tasks.py @@ -2,9 +2,10 @@ import json from pathlib import Path -from typing import Dict, List, Literal, Optional +from typing import Callable, Dict, List, Literal, Optional import numpy as np +from sklearn.metrics import accuracy_score from promptolution.predictors.base_predictor import BasePredictor from promptolution.tasks.base_task import BaseTask @@ -25,8 +26,9 @@ class ClassificationTask(BaseTask): xs (Optional[np.ndarray]): Input data for the task. ys (Optional[np.ndarray]): Ground truth labels for the task. classes (Optional[List]): List of possible class labels. - split (Literal["dev", "test"]): Dataset split to use. seed (int): Random seed for reproducibility. + split (Literal["dev", "test"]): Dataset split to use. + metric (Callable): Metric to use as an evaluation score for the prompts. Inherits from: BaseTask: The base class for tasks in the promptolution library. @@ -38,6 +40,7 @@ def __init__( task_id: str = "Classification Task", seed: int = 42, split: Literal["dev", "test"] = "dev", + metric: Callable = accuracy_score, ): """Initialize the ClassificationTask. @@ -46,6 +49,7 @@ def __init__( dataset_path (str): Path to the dataset description JSON file. seed (int, optional): Random seed for reproducibility. Defaults to 42. split (Literal["dev", "test"], optional): Dataset split to use. Defaults to "dev". + metric (Callable): Metric to use as an evaluation score for the prompts. Defaults to sklearn's accuracy. """ self.task_id: str = task_id self.path: Path = dataset_path @@ -56,6 +60,7 @@ def __init__( self.ys: Optional[np.ndarray] = None self.classes: Optional[List] = None self.split: Literal["dev", "test"] = split + self.metric = metric self._parse_task() self.reset_seed(seed) @@ -95,7 +100,12 @@ def _parse_task(self): self.ys = np.array(ys) def evaluate( - self, prompts: List[str], predictor: BasePredictor, n_samples: int = 20, subsample: bool = True + self, + prompts: List[str], + predictor: BasePredictor, + n_samples: int = 20, + subsample: bool = True, + return_seq: bool = False, ) -> np.ndarray: """Evaluate a set of prompts using a given predictor. @@ -104,6 +114,7 @@ def evaluate( predictor (BasePredictor): Predictor to use for evaluation. n_samples (int, optional): Number of samples to use if subsampling. Defaults to 20. subsample (bool, optional): Whether to use subsampling. Defaults to True. + return_seq (bool, optional): rather to return the generating sequence Returns: np.ndarray: Array of accuracy scores for each prompt. @@ -120,10 +131,17 @@ def evaluate( ys_subsample = self.ys[indices] # Make predictions on the subsample - preds = predictor.predict(prompts, xs_subsample) + preds = predictor.predict(prompts, xs_subsample, return_seq=return_seq) + + if return_seq: + preds, seqs = preds + + scores = np.array([self.metric(ys_subsample, pred) for pred in preds]) + + if return_seq: + return scores, seqs - # Calculate accuracy: number of correct predictions / total number of predictions per prompt - return np.mean(preds == ys_subsample, axis=1) + return scores def reset_seed(self, seed: int = None): """Reset the random seed.""" diff --git a/pyproject.toml b/pyproject.toml index 659f202..2527b96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ langchain-core = "^0.2.29" langchain-community = "^0.2.12" pandas = "^2.2.2" tqdm = "^4.66.5" +scikit-learn = "^1.5.2" [tool.poetry.group.dev.dependencies] matplotlib = "^3.9.2" From 4eb0fb0359528205dd65502e2a4280e33c83d9e4 Mon Sep 17 00:00:00 2001 From: finitearth Date: Sun, 13 Oct 2024 20:10:52 +0200 Subject: [PATCH 2/8] added random search selector --- notebooks/getting_started.ipynb | 195 +++++++----------- promptolution/config.py | 5 + promptolution/exemplar_selectors/__init__.py | 29 +++ .../base_exemplar_selector.py | 45 +++- .../random_search_selector.py | 39 ++++ .../exemplar_selectors/random_selector.py | 41 +++- promptolution/helpers.py | 6 +- promptolution/optimizers/__init__.py | 2 +- promptolution/optimizers/evoprompt_de.py | 2 +- promptolution/optimizers/evoprompt_ga.py | 4 +- promptolution/optimizers/opro.py | 2 +- promptolution/predictors/base_predictor.py | 6 +- promptolution/predictors/classificator.py | 23 --- promptolution/tasks/classification_tasks.py | 5 +- 14 files changed, 237 insertions(+), 167 deletions(-) create mode 100644 promptolution/exemplar_selectors/__init__.py create mode 100644 promptolution/exemplar_selectors/random_search_selector.py diff --git a/notebooks/getting_started.ipynb b/notebooks/getting_started.ipynb index 9401734..95ac1a9 100644 --- a/notebooks/getting_started.ipynb +++ b/notebooks/getting_started.ipynb @@ -99,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -107,50 +107,28 @@ " task_name=\"agnews\",\n", " ds_path=\"../data_sets/cls/agnews/\",\n", " n_steps=8,\n", - " optimizer=\"evopromptga\",\n", + " optimizer=\"evopromptde\",\n", " meta_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " evaluation_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " downstream_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " api_token=token,\n", " prepend_examplars=True,\n", - " n_examplars=5\n", + " exemplar_selector=\"random_search\",\n", ")" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "['Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nVa. Tech speeds up Mac OS X supercomputer by almost 20 OCTOBER 26, 2004 (MACCENTRAL) - Virginia Tech #39;s all-Mac OS X supercomputer at the university #39;s Terascale Computing Facility made headlines last year when it was determined to be the third-fastest supercomputer in the world.I would categorize this news article as \"Tech\". The primary theme of the article is about a Virginia Tech supercomputer and its speed, mentioning Mac OS X and referring to it as the third-fastest supercomputer in the world.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nCellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.I would categorize this news article under the \"Tech\" category. The primary theme of the article is a new smartphone feature being developed by Siemens Mobile, which is a technology company.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nMariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.I would categorize this news article as \"Sports\" as it revolves around the firing of a manager in a professional sports team, specifically the Seattle Mariners.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nMariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.Based on the primary theme, I would categorize this news article under the Sports category. The article is about the Seattle Mariners firing their manager, Scott Servais, and Bavasi suggesting he be considered for another managerial vacancy, making it a sports-related news article.', 'Your job is to determine whether a news article belongs to the World, Sports, Business, or Tech category based on its primary theme.\\nVenture by Time Warner And Microsoft Unsettles EU A Bethesda company owned by Microsoft Corp. and Time Warner Inc. is the subject of a European Union investigation into whether the company #39;s software will give the two media giants a monopoly in copyright protection.Based on the primary theme of the article, I would categorize it as \"Business\". The article discusses a European Union investigation into a business deal or merger between Time Warner and Microsoft, which is likely to have significant implications for the business industry.']\n", - "[\"You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nUnlikely Visionary Behind Private Race to Space SpaceShipOne visionary Peter Diamandis wasn't thinking about history as he stood in the Mojave desert and watched a small, shuttlecock-shaped craft glide back to Earth having nudged the edge of space. He just thought it looked beautiful.I would classify this news article as Tech. The primary topic is Peter Diamandis and his private space program, SpaceShipOne, which is a technological achievement in the field of space exploration.\", 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nNew fertility treatment \\'closer\\' Women may be able to have unfertilised eggs routinely frozen during fertility treatment rather than just embryos after research.I would classify this news article as \"Health/Science\" (which could also fit under \"World\")', 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nWell, it worked for Bill So let me get this straight: Barry Bonds used steroids, but he didn #39;t inhale?***Mr. Schwarzenegger, meet your new lieutenant governor.I would classify this news article as Sports. The primary topic appears to be about an athlete, Barry Bonds, and his use of steroids.', 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nMariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.I would classify this news article as Sports.', 'You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\\nIBM updates WebSphere Application Server software OCTOBER 06, 2004 (COMPUTERWORLD) - IBM today unveiled a new version of its WebSphere Application Server software, which is designed to protect applications from server downtime and boost enterprise efforts to build a service-oriented architecture (SOA).I would classify this news article as Tech. The article is primarily about IBM updating its WebSphere Application Server software, which is a topic related to technology.']\n", - "['You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nGuarding of Ukraine central election commission relieved KIEV, November 3 (Itar-Tass) - Water cannons and armoured personnel carriers were removed from the territory of the Ukrainian central election commission on Wednesday.I would classify this news article as World.', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nUpdate 6: Dutch Steelmaker Buying ISG in \\\\$4.5B Deal Steel tycoon Lakshmi Mittal announced plans Monday to take over US steelmaker International Steel Group as part of a three-way merger to create a global metals mammoth.I would classify this news article as Business.', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nAustralia red-faced BUOYANT New Zealand captain Stephen Fleming was relieved to have enlivened a dormant summer after his Black Caps conjured a stunning four-wicket win against Australia in the opening Sports', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nInjury fears allayed over Celtic #39;s McNamara Injury fears over Celtic skipper Jackie McNamara have subsided after a scan revealed he has not broken his ankle. It was initially feared that McNamara had broken his ankle after a clash against Barcelona #39;s Ludovic Giuly on Tuesday night.I would classify this article as **Sports**, as it is about an athlete (Jackie McNamara) from a sports team (Celtic) and an injury he sustained during a match.', 'You will be given a news article and asked to classify it as World, Sports, Business and Tech, depending on its main topic.\\nNASA Develops Robust Artificial Intelligence for Planetary Rovers NASA is planning to add a strong dose of artificial intelligence (AI) to planetary rovers to make them much more self-reliant, capable of making basic decisions during a mission. Scientists are developing very complex AI software that enables a higher level of robotic intelligence.I would classify this news article as **Tech**.']\n", - "['Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nCellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.I would categorize this news article under the category of \"Tech\". The core theme of the article is about a new innovative technology being developed by Siemens Mobile, a company that specializes in telecommunications and mobile technologies. The article is discussing a specific feature of a mobile phone that detects bad breath, which falls under the broader category of technology and innovation.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nHenson plays for Cowboys, now will he start? Now Henson is waiting like everyone else to find out if he #39;ll soon be the starting quarterback. Coach Bill Parcells today said starter Vinny Testaverde is questionable because of a sore shoulder and back.I would categorize this news article under \"Sports\", specifically under the category of NFL (National Football League) news. The article is discussing the Dallas Cowboys and their quarterback situation, with a focus on Henson\\'s potential to start and the uncertainty surrounding Vinny Testaverde\\'s injury status.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nRyanair may face Belgian courts Budget airline Ryanair is obliged to be taken to court under EU law if it fails to pay back illegal state aid for flying to Belgium\\'s Charleroi airport.I would categorize this news article as \"Business\". The core theme of the article is that Ryanair, a budget airline, may face Belgian courts due to allegations of unpaid state aid, which is a business-related issue.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nMicrosoft delays SP2 auto update Slowdown will give big companies more time to hunt down compatibility problems, the company says.I would categorize this news article as \"Tech\" because it primarily discusses software updates and their impact on computer systems, specifically the delay of SP2 auto update by Microsoft.', 'Organize the given news article into its most fitting category (World, Sports, Business, or Tech) according to its core theme or subject matter.\\nShould Your Next Car Be New or Used? Used cars have a lot to offer -- if you know what you\\'re doing.I would organize this news article into the category of \"Business\" since its core theme is about purchasing vehicles, specifically whether to buy a new or used car.']\n", - "['Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nDolby, Intel sound off for developers Companies plan tools for improving high-end audio for PCs with Intel inside.I would categorize this news article under \"Tech\" as the primary theme is the partnership between Dolby and Intel to develop tools for improving high-end audio for PCs with Intel processors.', 'Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nExplosion marks first day of Iraq campaigns Violence marred Iraq #39;s first official day of parliamentary election campaigning Wednesday as interim Iraqi Prime Minister Ayad Allawi announced his candidacy for the Jan. 30 I would categorize this news article under the category \"World\". The primary theme of the article is the political situation in Iraq, specifically the start of parliamentary election campaigns and the announcement of a candidate, which is a significant international event.', 'Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nYukos to stop oil exports to China Russian oil giant Yukos has decided to halt two-thirds of its oil exports to China, raising the stakes in its yearlong legal battle with the authorities.I would categorize this news article into the \"Business\" category, as the primary theme is the halt of oil exports to China by the Russian oil giant Yukos, which is a business-related issue.', 'Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nHalliburton to Restructure KBR Unit (Reuters) Reuters - Halliburton Co. said on\\\\Thursday it would restructure its KBR unit and may shed the\\\\business if the oil services company\\'s stock performance keeps\\\\lagging its peers.Based on the primary theme of the article, I would categorize it into the \"Business\" category. The article is focused on Halliburton\\'s plans to restructure its KBR unit and potentially sell it off due to lagging stock performance, making it a business-related news story.', \"Categorize the given news article into one of the four categories (World, Sports, Business, or Tech) based on its primary theme.\\nNL Wrap: Ramirez Shines in Cubs Victory Over Reds (Reuters) Reuters - Aramis Ramirez homered three times and\\\\drove in all of Chicago's runs as the Cubs edged the Cincinnati\\\\Reds 5-4 in National League play at the Great American Ballpark\\\\on Thursday.The primary theme of this news article is Sports, as it is about a baseball game played by the Chicago Cubs against the Cincinnati Reds.\"]\n", - "['Assign a topic category to the given news: World, Sports, Business, or Tech.\\nSchwab ex-CEO to get almost \\\\$10M SAN FRANCISCO (CBS.MW) -- Charles Schwab Corp. is paying David Pottruck, ousted as chief executive officer in July, a lump-sum payment of \\\\$6.I would categorize this news under \"Business\".', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nQB Tate to Lead Injured Hawkeyes Vs. PSU (AP) AP - No. 25 Iowa is having a hard time keeping its tailbacks healthy.I would assign the topic to the category \"Sports\".', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nNew fertility treatment \\'closer\\' Women may be able to have unfertilised eggs routinely frozen during fertility treatment rather than just embryos after research.I would assign this topic to the category \"Health/Science\" (which is often split into multiple categories such as World, Science, Medicine, etc.). The topic is related to medical research and treatment, specifically in the area of fertility.', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nInflation remains well contained: US Fed US Federal Reserve said Wednesday that rate hikes can continue at a quot;measured pace quot; as quot;inflation and longer-term inflation expectations remain well contained quot;.I would assign this news to the topic category: Business', 'Assign a topic category to the given news: World, Sports, Business, or Tech.\\nA young monk opens a rare window of candor in Tibet He confirms Chinese are diluting traditional Buddhist culture and the monks\\' role.I would categorize this news under \"World\".']\n", - "['Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nDolby, Intel sound off for developers Companies plan tools for improving high-end audio for PCs with Intel inside.I would classify this news item as \"Tech\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nU.S. Stocks Climb as Oil Prices Drop NEW YORK (Reuters) - The biggest one-day drop in oil prices since September 2001 helped send U.S. stock prices higher on Wednesday.I would classify this news item as \"Business\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nUN war crime tribunal calls first witness in Milosevic defense <b>...</b> The International Criminal Tribunal for Former Yugoslavia (ICTY) Tuesday called the first witness in the defense case of former Yugoslav President Slobodan Milosevic I would classify this news item as \"World\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nNetworks in a Duel of Technology Some television networks are racing to develop eye-catching score boxes. Fox, for example, now has fire coming out of its baseball scores.I would classify this news item as \"Tech\".', 'Your task is to classify the news item as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nAustralia red-faced BUOYANT New Zealand captain Stephen Fleming was relieved to have enlivened a dormant summer after his Black Caps conjured a stunning four-wicket win against Australia in the opening I classify this news item as \"Sports\".']\n", - "['Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nInjury fears allayed over Celtic #39;s McNamara Injury fears over Celtic skipper Jackie McNamara have subsided after a scan revealed he has not broken his ankle. It was initially feared that McNamara had broken his ankle after a clash against Barcelona #39;s Ludovic Giuly on Tuesday night.I would classify this news article into the theme: Sports.', \"Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nNintendo's innards Wondering what your shiny new Nintendo DS looks like on the inside? Lik Sang saves you the voided warranty with a new photo gallery showing a detailed dissection of the new handheld game machine. Missing LinksI would classify this news article into the theme: Tech\", 'Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nHackers Exploit Microsoft\\'s JPEG Flaw (AP) AP - In a harbinger of security threats to come, hackers have exploited a newly announced flaw in Microsoft Corp. programs and begun circulating malicious code hidden in images that use the popular JPEG format.I would classify this news article into the \"Tech\" theme. The article discusses a security flaw in Microsoft\\'s programs and how hackers are exploiting it, which is a topic related to technology.', 'Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nNew bid to end talks deadlock Northern Ireland Secretary Paul Murphy and Irish foreign minister Brian Cowen will chair talks involving unionists and nationalists at Stormont aimed at ending the stand-off over proposed changes to devolution.I would classify this news article into the \"World\" theme, as it deals with international relations, politics, and a specific geographical region (Northern Ireland).', 'Your objective is to classify a news article into one of four themes: World, Sports, Business and Tech.\\nGuarding of Ukraine central election commission relieved KIEV, November 3 (Itar-Tass) - Water cannons and armoured personnel carriers were removed from the territory of the Ukrainian central election commission on Wednesday.I would classify this news article into the \"World\" theme, as it is focused on a political event in Ukraine.']\n", - "['In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nJapan Expresses #39;serious Concern #39; About N Korean Missiles In <b>...</b> BEIJING (AFP) - Japanese negotiators told North Korea they had quot;serious concern quot; about its missile program in talks that extended for a second day in Beijing, a Japanese diplomat said.I would classify this article as \"World\". The article discusses international relations, specifically the talks between Japan and North Korea about North Korea\\'s missile program, which is a global affairs topic.', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nJuve Lead Is Cut AC Milan closed the gap on leaders Juventus to four points after receiving a helping hand from city-rivals Inter. Having already won their game at Chievo earlier in the day thanks to on-loan Chelsea striker I would classify this article as \"Sports\".', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nJapan Expresses #39;serious Concern #39; About N Korean Missiles In <b>...</b> BEIJING (AFP) - Japanese negotiators told North Korea they had quot;serious concern quot; about its missile program in talks that extended for a second day in Beijing, a Japanese diplomat said.I would classify this article as \"World\". The article appears to be discussing a meeting or talks between Japan and North Korea regarding North Korea\\'s missile program, which is a global political issue.', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nMartha Stewart Appeals Conviction Lawyers for Martha Stewart told a federal appeals court that she suffered a \"\"fundamentally unfair\"\" trial at the hands of prosecutors who incorrectly suggested she was accused of insider trading.I would classify this article as \"Business\". The article discusses Martha Stewart\\'s appeal of her conviction, which is related to a business/corporate issue (insider trading).', 'In this task, you are given a news article. Your task is to classify the article to one out of the four topics \"World\", \"Sports\", \"Business\", \"Tech\" if the article\"s main topic is relevant to the world, sports, business, and technology, correspondingly. If you are not sure about the topic, choose the closest option.\\nDynogen enters drug licensing deal Dynogen Pharmaceuticals Inc. of Waltham said it licensed a late-stage drug candidate from Mitsubishi Pharma Corp. Terms weren\\'t disclosed. The compound, pumosetrag, is thought to be helpful to patients suffering from a variety of gastrointestinal conditions, such as irritable bowel syndrome. Dynogen, a privately held firm, said it plans to start new clinical trials with pumosetrag next year. Mitsubishi, part ...I would classify this article as \"Business\". The main topic of the article is about Dynogen Pharmaceuticals Inc. licensing a drug candidate from Mitsubishi Pharma Corp., which is a business-related news.']\n", - "['Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nSun refreshes development tools line Refreshing its development tools arsenal, Sun Microsystems on Tuesday revealed plans for upcoming versions of Java Studio Creator and is unleashing upgrades to the NetBeans open source platform and Java Studio Enterprise.The central topic of the news article is: Tech', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nDynogen enters drug licensing deal Dynogen Pharmaceuticals Inc. of Waltham said it licensed a late-stage drug candidate from Mitsubishi Pharma Corp. Terms weren\\'t disclosed. The compound, pumosetrag, is thought to be helpful to patients suffering from a variety of gastrointestinal conditions, such as irritable bowel syndrome. Dynogen, a privately held firm, said it plans to start new clinical trials with pumosetrag next year. Mitsubishi, part ...The central topic of the news article is \"Dynogen enters drug licensing deal with Mitsubishi Pharma Corp.\" and it falls under the category of **Business**.', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nMac users face rare virus The malware disables Mac OS X\\'s built-in firewall, steals personal information and can destroy data.The central topic of the news article is \"Mac users face rare virus\" and it falls under the category of **Tech**.', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nYahoo Jazzes Up Mobile Search Yahoo Jazzes Up Mobile Search\\\\\\\\Google SMS started serving search results to mobile users in the US just a few days back. Now Yahoo! follows it with even more comprehensive feature set. Google SMS is currently text based for the results; Yahoo! jazzes them up with maps, website icons, as well ...The central topic of the news article is \"Yahoo! Jazzes Up Mobile Search\" and falls under the category of **Tech**.', 'Identify the central topic of the news article and place it into one of the following categories: World, Sports, Tech, or Business.\\nUpdate 6: Dutch Steelmaker Buying ISG in \\\\$4.5B Deal Steel tycoon Lakshmi Mittal announced plans Monday to take over US steelmaker International Steel Group as part of a three-way merger to create a global metals mammoth.The central topic of the news article is: Acquisition of International Steel Group by a Dutch Steelmaker\\n\\nCategory: Business']\n", - "['Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nWTO Rules Against EU Protection of Goods (AP) AP - The European Union said Friday its system of protecting names for regionally produced foods such as Champagne and Parma ham was strengthened by an initial ruling from the World Trade Organization.The main topic of the news article is \"WTO Rules Against EU Protection of Goods\".\\n\\nI would categorize this article under \"World\".', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nBentley hoping to avoid getting tripped up by Post A year ago, the Bentley Falcons ended C.W. Post\\'s two-year reign as Northeast-10 Conference football champions. Now the Pioneers have a chance to return the favor. Sitting atop the conference standings, C.W. Post (4-1, 4-0) hosts Bentley tomorrow and can inflict serious damage on any hopes the Falcons have of repeating.The main topic of this news article is \"Football: Bentley vs C.W. Post\".\\n\\nI choose: Sports', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nHurricane Ivan Threatens Barbados, East Caribbean MIAMI (Reuters) - Powerful Hurricane Ivan, the latest in a busy storm season, rushed over the Atlantic Ocean on Monday and threatened the eastern Caribbean Windward Islands.The main topic of the news article is: \"Hurricane Ivan Threatens Barbados, East Caribbean\"\\n\\nI would categorize this article under: **World**', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nFrance moves US plutonium south France sends plutonium from scrapped US nuclear warheads south under heavy guard to be reprocessed.The main topic of the news article is: France moves US plutonium\\n\\nCategory: World', 'Give the main topic of the news article and then choose from World, Sports, Tech and Business.\\nSuns Defeat Jazz 115-102 (AP) AP - Steve Nash had 20 points and 14 assists to lead the Phoenix Suns to their eighth straight win, 115-102 over the Utah Jazz on Tuesday night.Main topic of the news article: Suns defeat Jazz in basketball game\\n\\nCategory: Sports']\n", - "['Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nCellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.I would classify the topic of this news as \"Tech\". The article is about a new technology development, specifically a mobile phone feature that detects and alerts users about bad breath, which is a technological innovation.', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nNetworks in a Duel of Technology Some television networks are racing to develop eye-catching score boxes. Fox, for example, now has fire coming out of its baseball scores.I would classify the topic of this news as \"Tech\"', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nEgypt Sees Move Towards Israel-Palestinian Peace JERUSALEM (Reuters) - Egypt\\'s official news agency said Tuesday significant progress had been made in international efforts to end Israeli-Palestinian violence, but both sides to the conflict termed talk of a deal premature.I would classify the topic of this news as \"World\".', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nDolby, Intel sound off for developers Companies plan tools for improving high-end audio for PCs with Intel inside.I would classify the topic of this news as \"Tech\".', 'Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\\nAmericans playing it cool in the pool ATHENS Michael Phelps might receive his last gold medal of the Athens Olympics in a way no one would envision. I would classify this topic as \"Sports\".']\n", - "['Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nSpartans outplayed by George Washington in first round of BB amp;T <b>...</b> Washington, DC - The No. 11 MSU basketball team lost 96-83 to George Washington Saturday afternoon in the first-round of the BB amp;T Classic.I would classify this news article as \"Sports\".', 'Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nYukos faces \\\\$6.2bn tax deadline Russian oil giant Yukos says it is about to miss a deadline on a back tax payment of more than \\\\$6bn due from 2002.I would classify this news article as: Business', \"Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nHackers Exploit Microsoft's JPEG Flaw (AP) AP - In a harbinger of security threats to come, hackers have exploited a newly announced flaw in Microsoft Corp. programs and begun circulating malicious code hidden in images that use the popular JPEG format.I would classify this news article as **Tech**.\", \"Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nMinister offers apology for touchy remark Minister of Foreign Affairs Mark Chen () yesterday apologized for the words used to condemn a comment by his Singaporean counterpart who warned against Taiwan independence at a United Nations meeting last week.I would classify this news article as **World**. The article is about a minister's apology for a remark related to a United Nations meeting, which falls within the category of international relations and foreign affairs.\", 'Classify news articles according to their primary theme, selecting from one of the following categories: World, Sports, Business, or Tech.\\nBekele Succeeds Mentor Gebrselassie ATHENS (Reuters) - Ethiopian Kenenisa Bekele succeeded his mentor Haile Gebrselassie as the Olympic 10,000 meters champion Friday after an astonishing final lap in the longest track event.I would classify this news article as \"Sports\".']\n", - "['Choose a word from World, Sports, Business and Tech to categorize the given text.\\nMoody #39;s raises Dell #39;s senior unsecured debt rating NEW YORK, Aug 18 - Moody #39;s Investors Service said on Wednesday it raised the senior unsecured debt rating of Dell Inc. (DELL.O: Quote, Profile, Research) with a stable outlook citing the firm #39;s strong balance sheet and solid operating and financial ...I would categorize this text under \"Business\".', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nAmericans playing it cool in the pool ATHENS Michael Phelps might receive his last gold medal of the Athens Olympics in a way no one would envision. I would categorize the given text under \"Sports\".', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nSchwab ex-CEO to get almost \\\\$10M SAN FRANCISCO (CBS.MW) -- Charles Schwab Corp. is paying David Pottruck, ousted as chief executive officer in July, a lump-sum payment of \\\\$6.I would categorize the given text under \"Business\".', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nHonor System Used for Flu Shots at U.S. Chain Stores By GRETCHEN PARKER BALTIMORE (AP) -- At the Walgreens flu shot clinic, the honor system is in place. If you tell nurses there you have a chronic heart problem, you can get a shot...I would categorize the given text under \"Tech\" because it mentions a specific retail store, Walgreens, and a technological concept, i.e., the \"honor system\", which is typically applied digitally in various applications, including online transactions and digital requests.', 'Choose a word from World, Sports, Business and Tech to categorize the given text.\\nAmerican Alvin Harrison Banned for Four Years Olympic 400 meter relay gold medallist Alvin Harrison has accepted a four-year ban for drugs violations, the United States Anti-Doping Agency said on Tuesday.I would categorize the given text under \"Sports\" as it is related to a sports personality (Alvin Harrison) and his ban due to drug violations in a specific sport (Olympic 400 meter relay).']\n", - "['Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nRosy outlook from Greenspan fails to sway many economists WASHINGTON : The upbeat outlook from Federal Reserve Chairman Alan Greenspan has failed to convince many economists, who argue that the United States is likely to see tepid growth at best in the near future.Based on the main theme of the article, I would categorize it into Business. The article is discussing the outlook for the US economy and the opinions of economists, which falls under the category of business news.', 'Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nIBM updates WebSphere Application Server software OCTOBER 06, 2004 (COMPUTERWORLD) - IBM today unveiled a new version of its WebSphere Application Server software, which is designed to protect applications from server downtime and boost enterprise efforts to build a service-oriented architecture (SOA).I would categorize this news article as: **Tech**', \"Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nSony keeps PSP production low, focuses on PS2 Low initial shipment figures for Sony #39;s forthcoming PlayStation Portable are down to a decision to focus on manufacturing the profitable PlayStation 2 hardware, according to a spokesperson for the Japanese giant.I would categorize this news article as **Tech**. The article discusses Sony's production decisions related to their gaming consoles, specifically the PlayStation Portable (PSP) and the PlayStation 2 (PS2), which are both technology-related topics.\", 'Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nJuve Lead Is Cut AC Milan closed the gap on leaders Juventus to four points after receiving a helping hand from city-rivals Inter. Having already won their game at Chievo earlier in the day thanks to on-loan Chelsea striker Based on the main theme of the article, I would categorize it as \"Sports\". The article discusses a soccer match and the standings of teams in the league, which is a clear indication that it belongs to the Sports category.', 'Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\\nDefiant Sharon sets out Gaza plan Israeli PM Ariel Sharon pushes ahead with an accelerated plan to withdraw from the Gaza Strip despite opposition.Based on the main theme of the news article, I would categorize it as **World**. The article is focused on a political event in the Middle East, specifically the plan of Israeli Prime Minister Ariel Sharon to withdraw from the Gaza Strip, which is a significant international event.']\n" - ] - } - ], + "outputs": [], "source": [ "df = run_experiment(config)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -180,104 +158,44 @@ " \n", " \n", " \n", - " 0\n", - " Your job is to determine whether a news articl...\n", - " 1.00\n", - " \n", - " \n", - " 8\n", - " In this task, you are given a news article. Yo...\n", - " 0.90\n", - " \n", - " \n", - " 9\n", - " Identify the central topic of the news article...\n", - " 0.90\n", - " \n", - " \n", - " 1\n", - " You will be required to classify a news articl...\n", - " 0.85\n", - " \n", - " \n", - " 4\n", - " Categorize the given news article into one of ...\n", - " 0.85\n", - " \n", - " \n", - " 6\n", - " Your task is to classify the news item as \"Wor...\n", - " 0.85\n", - " \n", - " \n", - " 7\n", - " Your objective is to classify a news article i...\n", - " 0.85\n", - " \n", - " \n", " 3\n", - " Organize the given news article into its most ...\n", - " 0.75\n", - " \n", - " \n", - " 12\n", - " Classify news articles according to their prim...\n", - " 0.75\n", - " \n", - " \n", - " 13\n", - " Choose a word from World, Sports, Business and...\n", - " 0.75\n", + " You will be required to classify a news articl...\n", + " 0.70\n", " \n", " \n", " 2\n", - " You will be given a news article and asked to ...\n", - " 0.60\n", - " \n", - " \n", - " 5\n", - " Assign a topic category to the given news: Wor...\n", - " 0.60\n", + " Categorize the news article into one of four c...\n", + " 0.58\n", " \n", " \n", - " 10\n", - " Give the main topic of the news article and th...\n", - " 0.60\n", + " 4\n", + " Your task is to identify the primary topic of ...\n", + " 0.50\n", " \n", " \n", - " 14\n", + " 1\n", " Based on the main theme of given the news arti...\n", - " 0.55\n", + " 0.36\n", " \n", " \n", - " 11\n", + " 0\n", " Classify the topic of the following news as \"W...\n", - " 0.40\n", + " 0.30\n", " \n", " \n", "\n", "" ], "text/plain": [ - " prompt score\n", - "0 Your job is to determine whether a news articl... 1.00\n", - "8 In this task, you are given a news article. Yo... 0.90\n", - "9 Identify the central topic of the news article... 0.90\n", - "1 You will be required to classify a news articl... 0.85\n", - "4 Categorize the given news article into one of ... 0.85\n", - "6 Your task is to classify the news item as \"Wor... 0.85\n", - "7 Your objective is to classify a news article i... 0.85\n", - "3 Organize the given news article into its most ... 0.75\n", - "12 Classify news articles according to their prim... 0.75\n", - "13 Choose a word from World, Sports, Business and... 0.75\n", - "2 You will be given a news article and asked to ... 0.60\n", - "5 Assign a topic category to the given news: Wor... 0.60\n", - "10 Give the main topic of the news article and th... 0.60\n", - "14 Based on the main theme of given the news arti... 0.55\n", - "11 Classify the topic of the following news as \"W... 0.40" + " prompt score\n", + "3 You will be required to classify a news articl... 0.70\n", + "2 Categorize the news article into one of four c... 0.58\n", + "4 Your task is to identify the primary topic of ... 0.50\n", + "1 Based on the main theme of given the news arti... 0.36\n", + "0 Classify the topic of the following news as \"W... 0.30" ] }, - "execution_count": 8, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -288,29 +206,70 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "0.3\n", + "Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\n", + "Inter Conquer Brussels Inter emerged as the winners over Anderlecht in Brussels to claim the first spot in the Champions League Group G. The nerazzurri took the lead nearly immediately when Adriano from the left served a low powerful \n", + "I would classify the topic of this news as \"Sports\".\n", + "Cellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.\n", + "I would classify the topic of this news as \"Tech\". The article is about a new mobile phone feature being developed by a telecommunications company, which falls under the category of technology news.\n", + "Moody #39;s raises Dell #39;s senior unsecured debt rating NEW YORK, Aug 18 - Moody #39;s Investors Service said on Wednesday it raised the senior unsecured debt rating of Dell Inc. (DELL.O: Quote, Profile, Research) with a stable outlook citing the firm #39;s strong balance sheet and solid operating and financial ...\n", + "I would classify this topic as \"Business\".\n", + "======\n", + "0.36\n", + "Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\n", + "Too Good to Be True Baseball's back in Washington. So with that award comes an acceptance letter to highlight those efforts that made this dream come to fruition.\n", + "I would categorize this article as \"Sports\". The article mentions baseball, which is a sport, and an award, which suggests that the article is highlighting a achievement in the sports world.\n", + "NASA Capsule Containing Solar Wind Particles Crashes (Update1) A space capsule containing NASA #39;s first sample from space in more than 30 years crashed into the Utah desert today after its parachutes failed to open.\n", + "I would categorize this news article as \"World\" since it involves a space-related incident and a NASA capsule crashing in the Utah desert, which has international implications and interest.\n", + "Baghdad violence kills at least 25 people and stoking fears that sectarian divisions over when to hold elections could unleash further bloodshed. In a second dawn attack in the capital on Friday, guerrillas killed at \n", + "I would categorize this news article as \"World\". The article is reporting on a violent event in Baghdad, Iraq, and discussing the potential consequences of sectarian divisions, which is a global news story.\n", + "======\n", + "0.58\n", + "Categorize the news article into one of four categories: World, Sports, Business, or Tech, based on its content.\n", + "Cellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.\n", + "I would categorize this news article into the \"Tech\" category, as it is about a new mobile phone feature being developed by Siemens Mobile, a telecommunications company.\n", + "Software piracy whistle-blowers get bigger rewards The BSA trade group ups its reward for information on U.K. companies that are using illegal software.\n", + "I would categorize this news article under \"Tech\". The article is about Software piracy and the BSA trade group offering rewards for information on U.K. companies that are using illegal software, which is a tech-related issue.\n", + "PeopleSoft: Oracle's Offer Is Inadequate (AP) AP - Oracle Corp.'s #36;21-per-share hostile takeover bid for PeopleSoft Inc. isn't good enough for shareholders to accept, PeopleSoft director A. George \"\"Skip\"\" Battle said Wednesday.\n", + "I would categorize this news article as \"Business\".\n", + "======\n", + "0.7\n", "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", - "Unlikely Visionary Behind Private Race to Space SpaceShipOne visionary Peter Diamandis wasn't thinking about history as he stood in the Mojave desert and watched a small, shuttlecock-shaped craft glide back to Earth having nudged the edge of space. He just thought it looked beautiful.I would classify this news article as Tech. The primary topic is Peter Diamandis and his private space program, SpaceShipOne, which is a technological achievement in the field of space exploration.\n", - "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", - "New fertility treatment 'closer' Women may be able to have unfertilised eggs routinely frozen during fertility treatment rather than just embryos after research.I would classify this news article as \"Health/Science\" (which could also fit under \"World\")\n", - "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", - "Well, it worked for Bill So let me get this straight: Barry Bonds used steroids, but he didn #39;t inhale?***Mr. Schwarzenegger, meet your new lieutenant governor.I would classify this news article as Sports. The primary topic appears to be about an athlete, Barry Bonds, and his use of steroids.\n", - "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", - "Mariners fire Melvin after two seasons NOTES He indicated to reporters Sunday that he still wanted to manage the Mariners, but Melvin might get another chance elsewhere. Bavasi said he called an undisclosed club to suggest Melvin be considered for a managerial vacancy.I would classify this news article as Sports.\n", - "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", - "IBM updates WebSphere Application Server software OCTOBER 06, 2004 (COMPUTERWORLD) - IBM today unveiled a new version of its WebSphere Application Server software, which is designed to protect applications from server downtime and boost enterprise efforts to build a service-oriented architecture (SOA).I would classify this news article as Tech. The article is primarily about IBM updating its WebSphere Application Server software, which is a topic related to technology.\n", - "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n" + "Inflation remains well contained: US Fed US Federal Reserve said Wednesday that rate hikes can continue at a quot;measured pace quot; as quot;inflation and longer-term inflation expectations remain well contained quot;.\n", + "I would classify this news article as **Business**. The primary topic is the US Federal Reserve's statement on inflation and interest rates, which is a business-related news story.\n", + "Honor System Used for Flu Shots at U.S. Chain Stores By GRETCHEN PARKER BALTIMORE (AP) -- At the Walgreens flu shot clinic, the honor system is in place. If you tell nurses there you have a chronic heart problem, you can get a shot...\n", + "I would classify this news article as Business, as it primarily discusses the healthcare services offered by Walgreens, a retail chain store.\n", + "HP unveils low-cost corporate copier With streamlined multifunction device, tech giant aims to gain ground in business copier market.\\\n", + "I would classify this news article as Business. The primary topic is HP's announcement of a new product (a low-cost corporate copier) and its intended impact on the business copier market, making it a business-related article.\n", + "======\n", + "0.5\n", + "Your task is to identify the primary topic of the news artical and choose from World, Sports, Business and Tech.\n", + "Australia red-faced BUOYANT New Zealand captain Stephen Fleming was relieved to have enlivened a dormant summer after his Black Caps conjured a stunning four-wicket win against Australia in the opening \n", + "Based on the article, the primary topic is:\n", + "\n", + "* Sports\n", + "Inter Conquer Brussels Inter emerged as the winners over Anderlecht in Brussels to claim the first spot in the Champions League Group G. The nerazzurri took the lead nearly immediately when Adriano from the left served a low powerful \n", + "The primary topic of the article is:\n", + "\n", + "**Sports**\n", + "Cellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.\n", + "The primary topic of this news article is \"Tech\". Specifically, it's about a new smartphone feature developed by Siemens Mobile that detects bad breath.\n", + "======\n" ] } ], "source": [ - "print(df.loc[1, \"prompt\"])" + "for i in range(len(df )): \n", + " print(df.loc[i, \"score\"])\n", + " print(df.loc[i, \"prompt\"])\n", + " print(\"======\")" ] }, { diff --git a/promptolution/config.py b/promptolution/config.py index 12473fd..aa82bc9 100644 --- a/promptolution/config.py +++ b/promptolution/config.py @@ -33,6 +33,9 @@ class Config: meta_prompt (str): Prompt template for the meta LLM. Defaults to None. prepend_examplars (bool): rather to do examplar search and prepend few-shot examples. Defaults to False. n_examplars (int): how many examplars to prepend. Only used if prepend_examplars is True. Defaults to 5. + exemplar_selector (str): which exemplar selector to use. Defaults to None. + n_ds_samples_to_meta (int): how many examples to show of the ds to show to meta-llm + (not applicable to every optimizer) """ task_name: str = None @@ -55,6 +58,8 @@ class Config: meta_prompt: Optional[str] = None prepend_examplars: Optional[bool] = False n_examplars: Optional[int] = 5 + exemplar_selector: Optional[str] = None + n_ds_samples_to_meta: Optional[int] = 2 def __post_init__(self): """Validate the configuration after initialization.""" diff --git a/promptolution/exemplar_selectors/__init__.py b/promptolution/exemplar_selectors/__init__.py new file mode 100644 index 0000000..dc031e2 --- /dev/null +++ b/promptolution/exemplar_selectors/__init__.py @@ -0,0 +1,29 @@ +"""Module for exemplar selectors.""" + +from promptolution.exemplar_selectors.random_search_selector import RandomSearchSelector +from promptolution.exemplar_selectors.random_selector import RandomSelector +from promptolution.predictors.base_predictor import BasePredictor +from promptolution.tasks.base_task import BaseTask + + +def get_exemplar_selector(name: str, task: BaseTask, predictor: BasePredictor): + """Factory function to get an exemplar selector based on the given name. + + Args: + name (str): The name of the exemplar selector to instantiate. + task (BaseTask): The task object to be passed to the selector. + predictor (BasePredictor): The predictor object to be passed to the selector. + + Returns: + BaseExemplarSelector: An instance of the requested exemplar selector. + + Raises: + ValueError: If the requested selector name is not found in the SELECTOR_MAPPING. + """ + if name == "random": + return RandomSelector(task, predictor) + + if name == "random_search": + return RandomSearchSelector(task, predictor) + + raise ValueError(f"Unknown exemplar selector: {name}") diff --git a/promptolution/exemplar_selectors/base_exemplar_selector.py b/promptolution/exemplar_selectors/base_exemplar_selector.py index 82deb61..dd96e7b 100644 --- a/promptolution/exemplar_selectors/base_exemplar_selector.py +++ b/promptolution/exemplar_selectors/base_exemplar_selector.py @@ -1,6 +1,41 @@ -class BaseExemplarSelector: - def __init__(self): - pass +"""Base class for exemplar selectors.""" - def select_exemplars(self, prompt, n_examples: int = 5): - pass +from abc import ABC, abstractmethod +from typing import Any, List, Tuple + +from promptolution.predictors.base_predictor import BasePredictor +from promptolution.tasks.base_task import BaseTask + + +class BaseExemplarSelector(ABC): + """An abstract base class for exemplar selectors. + + This class defines the basic interface and common functionality + that all exemplar selectors should implement. + """ + + def __init__(self, task: BaseTask, predictor: BasePredictor): + """Initialize the BaseExemplarSelector. + + Args: + task (BaseTask): An object representing the task to be performed. + predictor (BasePredictor): An object capable of making predictions based on prompts. + """ + self.task = task + self.predictor = predictor + + @abstractmethod + def select_exemplars(self, prompt: str, n_examples: int = 5) -> str: + """Select exemplars based on the given prompt. + + Args: + prompt (str): The input prompt to base the exemplar selection on. + n_examples (int, optional): The number of exemplars to select. Defaults to 5. + + Returns: + str: A new prompt that includes the original prompt and the selected exemplars. + + Raises: + NotImplementedError: This method should be implemented by subclasses. + """ + raise NotImplementedError("This method should be implemented by subclasses.") diff --git a/promptolution/exemplar_selectors/random_search_selector.py b/promptolution/exemplar_selectors/random_search_selector.py new file mode 100644 index 0000000..c9c1b5a --- /dev/null +++ b/promptolution/exemplar_selectors/random_search_selector.py @@ -0,0 +1,39 @@ +"""Random search exemplar selector.""" + +from promptolution.exemplar_selectors.base_exemplar_selector import BaseExemplarSelector + + +class RandomSearchSelector(BaseExemplarSelector): + """A selector that uses random search to find the best set of exemplars. + + This class implements a strategy that generates multiple sets of random examples, + evaluates their performance, and selects the best performing set. + """ + + def select_exemplars(self, prompt, n_examples: int = 5, n_trials: int = 5): + """Select exemplars using a random search strategy. + + This method generates multiple sets of random examples, evaluates their performance + when combined with the original prompt, and returns the best performing set. + + Args: + prompt (str): The input prompt to base the exemplar selection on. + n_examples (int, optional): The number of exemplars to select in each trial. Defaults to 5. + n_trials (int, optional): The number of random trials to perform. Defaults to 5. + + Returns: + str: The best performing prompt, which includes the original prompt and the selected exemplars. + """ + best_score = 0 + best_prompt = prompt + + for _ in range(n_trials): + _, seq = self.task.evaluate(prompt, self.predictor, n_samples=n_examples, subsample=True, return_seq=True) + # evaluate prompts as few shots + prompt_with_examples = "\n".join([prompt] + seq) + score = self.task.evaluate(prompt_with_examples, self.predictor, subsample=True) + if score > best_score: + best_score = score + best_prompt = prompt_with_examples + + return best_prompt diff --git a/promptolution/exemplar_selectors/random_selector.py b/promptolution/exemplar_selectors/random_selector.py index 212ed05..4d8540c 100644 --- a/promptolution/exemplar_selectors/random_selector.py +++ b/promptolution/exemplar_selectors/random_selector.py @@ -1,19 +1,46 @@ +"""Random exemplar selector.""" + from promptolution.exemplar_selectors.base_exemplar_selector import BaseExemplarSelector +from promptolution.predictors.base_predictor import BasePredictor +from promptolution.tasks.base_task import BaseTask class RandomSelector(BaseExemplarSelector): - def __init__(self, task, predictor): - super().__init__() - self.task = task - self.predictor = predictor + """A selector that randomly selects correct exemplars. + + This class implements a strategy that generates random examples and selects + those that are evaluated as correct until the desired number of exemplars is reached. + """ + + def __init__(self, task: BaseTask, predictor: BasePredictor, desired_score: int = 1): + """Initialize the RandomSelector. + + Args: + task (BaseTask): An object representing the task to be performed. + predictor (BasePredictor): An object capable of making predictions based on prompts. + desired_score (int, optional): The desired score for the exemplars. Defaults to 1. + """ + super().__init__(task, predictor) + self.desired_score = desired_score def select_exemplars(self, prompt, n_examples: int = 5): - # use shape for evaluation st correct or incorrect can be identified + """Select exemplars using a random selection strategy. + + This method generates random examples and selects those that are evaluated as correct + (score == self.desired_score) until the desired number of exemplars is reached. + + Args: + prompt (str): The input prompt to base the exemplar selection on. + n_examples (int, optional): The number of exemplars to select. Defaults to 5. + + Returns: + str: A new prompt that includes the original prompt and the selected exemplars. + """ examples = [] while len(examples) < n_examples: score, seq = self.task.evaluate(prompt, self.predictor, n_samples=1, return_seq=True) - if score == 1: + if score == self.desired_score: examples.append(seq[0]) - prompt = "\n".join(examples + [prompt]) + prompt = "\n".join([prompt] + examples) return prompt diff --git a/promptolution/helpers.py b/promptolution/helpers.py index c1fdb46..6db64c7 100644 --- a/promptolution/helpers.py +++ b/promptolution/helpers.py @@ -6,7 +6,7 @@ import pandas as pd from promptolution.config import Config -from promptolution.exemplar_selectors.random_selector import RandomSelector +from promptolution.exemplar_selectors import get_exemplar_selector from promptolution.llms import get_llm from promptolution.optimizers import get_optimizer from promptolution.predictors import Classificator @@ -56,7 +56,7 @@ def run_optimization(config: Config): prompts = optimizer.optimize(n_steps=config.n_steps) if config.prepend_examplars: - selector = RandomSelector(task, predictor) + selector = get_exemplar_selector(config.exemplar_selector, task, predictor) prompts = [selector.select_exemplars(p, n_examples=config.n_examplars) for p in prompts] return prompts @@ -77,7 +77,7 @@ def run_evaluation(config: Config, prompts: List[str]): llm = get_llm(config.evaluation_llm, token=config.api_token) predictor = Classificator(llm, classes=task.classes) - scores = task.evaluate(prompts, predictor) + scores = task.evaluate(prompts, predictor, subsample=True, n_samples=50) df = pd.DataFrame(dict(prompt=prompts, score=scores)) df = df.sort_values("score", ascending=False) diff --git a/promptolution/optimizers/__init__.py b/promptolution/optimizers/__init__.py index 11d2956..ae4ed93 100644 --- a/promptolution/optimizers/__init__.py +++ b/promptolution/optimizers/__init__.py @@ -66,7 +66,7 @@ def get_optimizer( if config.optimizer == "opro": prompt_template = OPRO_TEMPLATE prompt_template = config.meta_prompt if config.meta_prompt else prompt_template - n_samples = kwargs.get("n_samples", config.n_samples if config is not None else None) + n_samples = kwargs.get("n_samples", config.n_ds_samples_to_meta if config is not None else None) return Opro(prompt_template=prompt_template, n_samples=n_samples, *args, **kwargs) raise ValueError(f"Unknown optimizer: {config.optimizer}") diff --git a/promptolution/optimizers/evoprompt_de.py b/promptolution/optimizers/evoprompt_de.py index 7772a03..8ae1ab3 100644 --- a/promptolution/optimizers/evoprompt_de.py +++ b/promptolution/optimizers/evoprompt_de.py @@ -80,7 +80,7 @@ def optimize(self, n_steps: int) -> List[str]: child_prompts = self.meta_llm.get_response(meta_prompts) child_prompts = [prompt.split("")[-1].split("")[0].strip() for prompt in child_prompts] - child_scores = self.task.evaluate(child_prompts, self.predictor) + child_scores = self.task.evaluate(child_prompts, self.predictor, subsample=True) for i in range(len(self.prompts)): if child_scores[i] > self.scores[i]: diff --git a/promptolution/optimizers/evoprompt_ga.py b/promptolution/optimizers/evoprompt_ga.py index 2393ef5..0c51a17 100644 --- a/promptolution/optimizers/evoprompt_ga.py +++ b/promptolution/optimizers/evoprompt_ga.py @@ -56,7 +56,7 @@ def optimize(self, n_steps: int) -> List[str]: List[str]: The optimized list of prompts after all steps. """ # get scores from task - self.scores = self.task.evaluate(self.prompts, self.predictor).tolist() + self.scores = self.task.evaluate(self.prompts, self.predictor, subsample=True).tolist() # sort prompts by score self.prompts = [prompt for _, prompt in sorted(zip(self.scores, self.prompts), reverse=True)] self.scores = sorted(self.scores, reverse=True) @@ -64,7 +64,7 @@ def optimize(self, n_steps: int) -> List[str]: for _ in range(n_steps): new_prompts = self._crossover(self.prompts, self.scores) prompts = self.prompts + new_prompts - scores = self.scores + self.task.evaluate(new_prompts, self.predictor).tolist() + scores = self.scores + self.task.evaluate(new_prompts, self.predictor, subsample=True).tolist() # sort scores and prompts self.prompts = [prompt for _, prompt in sorted(zip(scores, prompts), reverse=True)][: len(self.prompts)] diff --git a/promptolution/optimizers/opro.py b/promptolution/optimizers/opro.py index ddf504e..4dc66e0 100644 --- a/promptolution/optimizers/opro.py +++ b/promptolution/optimizers/opro.py @@ -78,7 +78,7 @@ def optimize(self, n_steps: int) -> List[str]: prompt = self.meta_llm.get_response([meta_prompt])[0] prompt = prompt.split("")[-1].split("")[0].strip() - score = self.task.evaluate(prompt, self.predictor) + score = self.task.evaluate(prompt, self.predictor, subsample=True) self.prompts.append(prompt) self.scores.append(score) diff --git a/promptolution/predictors/base_predictor.py b/promptolution/predictors/base_predictor.py index f256ae6..3535c62 100644 --- a/promptolution/predictors/base_predictor.py +++ b/promptolution/predictors/base_predictor.py @@ -43,16 +43,14 @@ def predict(self, prompts: List[str], xs: np.ndarray, return_seq: bool = False) Raises: NotImplementedError: If not implemented by a subclass. """ - if isinstance(prompts, str): prompts = [prompts] - inputs = [prompt + "\n" + x for prompt in prompts for x in xs] - outputs = self.llm.get_response(inputs) + outputs = self.llm.get_response([prompt + "\n" + x for prompt in prompts for x in xs]) preds = self._extract_preds(outputs, (len(prompts), len(xs))) if return_seq: - return preds, [i + "\n" + o for i, o in zip(inputs, outputs)] + return preds, [i + "\n" + o for i, o in zip(xs, outputs)] return preds diff --git a/promptolution/predictors/classificator.py b/promptolution/predictors/classificator.py index c4941e1..9679266 100644 --- a/promptolution/predictors/classificator.py +++ b/promptolution/predictors/classificator.py @@ -31,29 +31,6 @@ def __init__(self, llm, classes, *args, **kwargs): super().__init__(llm) self.classes = classes - # def predict( - # self, - # prompts: List[str], - # xs: np.ndarray, - # ) -> np.ndarray: - # """Predict classes for given prompts and input data. - - # This method generates predictions using the language model and then - # extracts the predicted class from the model's output. - - # Args: - # prompts (List[str]): The list of prompts to use for prediction. - # xs (np.ndarray): The input data array. - - # Returns: - # np.ndarray: A 2D array of predicted classes, with shape (len(prompts), len(xs)). - - # Note: - # The method concatenates each prompt with each input data point, - # passes it to the language model, and then extracts the first word - # in the response that matches a class in self.classes. - # """ - def _extract_preds(self, preds, shape): response = [] for pred in preds: diff --git a/promptolution/tasks/classification_tasks.py b/promptolution/tasks/classification_tasks.py index efa200f..f077b59 100644 --- a/promptolution/tasks/classification_tasks.py +++ b/promptolution/tasks/classification_tasks.py @@ -104,7 +104,7 @@ def evaluate( prompts: List[str], predictor: BasePredictor, n_samples: int = 20, - subsample: bool = True, + subsample: bool = False, return_seq: bool = False, ) -> np.ndarray: """Evaluate a set of prompts using a given predictor. @@ -113,7 +113,8 @@ def evaluate( prompts (List[str]): List of prompts to evaluate. predictor (BasePredictor): Predictor to use for evaluation. n_samples (int, optional): Number of samples to use if subsampling. Defaults to 20. - subsample (bool, optional): Whether to use subsampling. Defaults to True. + subsample (bool, optional): Whether to use subsampling. + If set to true, samples a different subset per call. Defaults to False. return_seq (bool, optional): rather to return the generating sequence Returns: From 99bb9bab29ad5a991c5dd5de79bf1420ac1e0ebc Mon Sep 17 00:00:00 2001 From: finitearth Date: Sun, 13 Oct 2024 20:17:26 +0200 Subject: [PATCH 3/8] increased version count --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2527b96..8635296 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "promptolution" -version = "0.2.0" +version = "1.0.0" description = "" authors = ["Tom Zehle, Moritz Schlager, Timo Heiß"] readme = "README.md" From fe4b68b87567ab53911e67dbfc96cd3560ef74c5 Mon Sep 17 00:00:00 2001 From: finitearth Date: Sun, 13 Oct 2024 20:30:13 +0200 Subject: [PATCH 4/8] fix typos --- notebooks/getting_started.ipynb | 221 +------------------------------- promptolution/config.py | 40 +++--- promptolution/helpers.py | 4 +- 3 files changed, 27 insertions(+), 238 deletions(-) diff --git a/notebooks/getting_started.ipynb b/notebooks/getting_started.ipynb index 95ac1a9..d785db5 100644 --- a/notebooks/getting_started.ipynb +++ b/notebooks/getting_started.ipynb @@ -64,18 +64,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\tzehl\\Documents\\programming\\promptolution\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], + "outputs": [], "source": [ "from promptolution.helpers import run_experiment\n", "from promptolution.config import Config" @@ -112,7 +103,7 @@ " evaluation_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " downstream_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", " api_token=token,\n", - " prepend_examplars=True,\n", + " prepend_exemplars=True,\n", " exemplar_selector=\"random_search\",\n", ")" ] @@ -203,212 +194,6 @@ "source": [ "df" ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.3\n", - "Classify the topic of the following news as \"World\", \"Sports\", \"Tech\" or \"Business\".\n", - "Inter Conquer Brussels Inter emerged as the winners over Anderlecht in Brussels to claim the first spot in the Champions League Group G. The nerazzurri took the lead nearly immediately when Adriano from the left served a low powerful \n", - "I would classify the topic of this news as \"Sports\".\n", - "Cellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.\n", - "I would classify the topic of this news as \"Tech\". The article is about a new mobile phone feature being developed by a telecommunications company, which falls under the category of technology news.\n", - "Moody #39;s raises Dell #39;s senior unsecured debt rating NEW YORK, Aug 18 - Moody #39;s Investors Service said on Wednesday it raised the senior unsecured debt rating of Dell Inc. (DELL.O: Quote, Profile, Research) with a stable outlook citing the firm #39;s strong balance sheet and solid operating and financial ...\n", - "I would classify this topic as \"Business\".\n", - "======\n", - "0.36\n", - "Based on the main theme of given the news article, categorize it into World, Sports, Business, or Tech.\n", - "Too Good to Be True Baseball's back in Washington. So with that award comes an acceptance letter to highlight those efforts that made this dream come to fruition.\n", - "I would categorize this article as \"Sports\". The article mentions baseball, which is a sport, and an award, which suggests that the article is highlighting a achievement in the sports world.\n", - "NASA Capsule Containing Solar Wind Particles Crashes (Update1) A space capsule containing NASA #39;s first sample from space in more than 30 years crashed into the Utah desert today after its parachutes failed to open.\n", - "I would categorize this news article as \"World\" since it involves a space-related incident and a NASA capsule crashing in the Utah desert, which has international implications and interest.\n", - "Baghdad violence kills at least 25 people and stoking fears that sectarian divisions over when to hold elections could unleash further bloodshed. In a second dawn attack in the capital on Friday, guerrillas killed at \n", - "I would categorize this news article as \"World\". The article is reporting on a violent event in Baghdad, Iraq, and discussing the potential consequences of sectarian divisions, which is a global news story.\n", - "======\n", - "0.58\n", - "Categorize the news article into one of four categories: World, Sports, Business, or Tech, based on its content.\n", - "Cellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.\n", - "I would categorize this news article into the \"Tech\" category, as it is about a new mobile phone feature being developed by Siemens Mobile, a telecommunications company.\n", - "Software piracy whistle-blowers get bigger rewards The BSA trade group ups its reward for information on U.K. companies that are using illegal software.\n", - "I would categorize this news article under \"Tech\". The article is about Software piracy and the BSA trade group offering rewards for information on U.K. companies that are using illegal software, which is a tech-related issue.\n", - "PeopleSoft: Oracle's Offer Is Inadequate (AP) AP - Oracle Corp.'s #36;21-per-share hostile takeover bid for PeopleSoft Inc. isn't good enough for shareholders to accept, PeopleSoft director A. George \"\"Skip\"\" Battle said Wednesday.\n", - "I would categorize this news article as \"Business\".\n", - "======\n", - "0.7\n", - "You will be required to classify a news article as World, Sports, Business, or Tech based on its primary topic.\n", - "Inflation remains well contained: US Fed US Federal Reserve said Wednesday that rate hikes can continue at a quot;measured pace quot; as quot;inflation and longer-term inflation expectations remain well contained quot;.\n", - "I would classify this news article as **Business**. The primary topic is the US Federal Reserve's statement on inflation and interest rates, which is a business-related news story.\n", - "Honor System Used for Flu Shots at U.S. Chain Stores By GRETCHEN PARKER BALTIMORE (AP) -- At the Walgreens flu shot clinic, the honor system is in place. If you tell nurses there you have a chronic heart problem, you can get a shot...\n", - "I would classify this news article as Business, as it primarily discusses the healthcare services offered by Walgreens, a retail chain store.\n", - "HP unveils low-cost corporate copier With streamlined multifunction device, tech giant aims to gain ground in business copier market.\\\n", - "I would classify this news article as Business. The primary topic is HP's announcement of a new product (a low-cost corporate copier) and its intended impact on the business copier market, making it a business-related article.\n", - "======\n", - "0.5\n", - "Your task is to identify the primary topic of the news artical and choose from World, Sports, Business and Tech.\n", - "Australia red-faced BUOYANT New Zealand captain Stephen Fleming was relieved to have enlivened a dormant summer after his Black Caps conjured a stunning four-wicket win against Australia in the opening \n", - "Based on the article, the primary topic is:\n", - "\n", - "* Sports\n", - "Inter Conquer Brussels Inter emerged as the winners over Anderlecht in Brussels to claim the first spot in the Champions League Group G. The nerazzurri took the lead nearly immediately when Adriano from the left served a low powerful \n", - "The primary topic of the article is:\n", - "\n", - "**Sports**\n", - "Cellphone That Detects Bad Breath Siemens Mobile, the German telecommunications company, has announced that it is working on a mobile phone that makes users aware when they have bad breath.\n", - "The primary topic of this news article is \"Tech\". Specifically, it's about a new smartphone feature developed by Siemens Mobile that detects bad breath.\n", - "======\n" - ] - } - ], - "source": [ - "for i in range(len(df )): \n", - " print(df.loc[i, \"score\"])\n", - " print(df.loc[i, \"prompt\"])\n", - " print(\"======\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
promptscore
0Classify the news story into one of the follow...0.95
7You will be required to classify a news articl...0.90
11Classify the topic of the following news as \"W...0.90
12Classify news articles into categories (World,...0.90
4Classify the given news article into one of th...0.85
6Your job is to determine whether a news articl...0.85
13Categorize the provided news article according...0.85
1Categorize the news article into one of four c...0.80
2Your responsibility is to accurately categoriz...0.80
3Identify the primary theme of a news article a...0.80
8In this task, you are given a news article. Yo...0.80
14Accurately categorize news articles into World...0.80
5Accurately categorize the provided news articl...0.75
9Determine the theme of the news item. Choose f...0.75
10Determine the primary theme of the news articl...0.70
\n", - "
" - ], - "text/plain": [ - " prompt score\n", - "0 Classify the news story into one of the follow... 0.95\n", - "7 You will be required to classify a news articl... 0.90\n", - "11 Classify the topic of the following news as \"W... 0.90\n", - "12 Classify news articles into categories (World,... 0.90\n", - "4 Classify the given news article into one of th... 0.85\n", - "6 Your job is to determine whether a news articl... 0.85\n", - "13 Categorize the provided news article according... 0.85\n", - "1 Categorize the news article into one of four c... 0.80\n", - "2 Your responsibility is to accurately categoriz... 0.80\n", - "3 Identify the primary theme of a news article a... 0.80\n", - "8 In this task, you are given a news article. Yo... 0.80\n", - "14 Accurately categorize news articles into World... 0.80\n", - "5 Accurately categorize the provided news articl... 0.75\n", - "9 Determine the theme of the news item. Choose f... 0.75\n", - "10 Determine the primary theme of the news articl... 0.70" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df" - ] } ], "metadata": { diff --git a/promptolution/config.py b/promptolution/config.py index aa82bc9..d7a7c7d 100644 --- a/promptolution/config.py +++ b/promptolution/config.py @@ -13,13 +13,13 @@ class Config: either from a config file or from keyword arguments. Attributes: - task_name (str): Name of the task. - ds_path (str): Path to the dataset. - n_steps (int): Number of optimization steps. - optimizer (str): Name of the optimizer to use. - meta_llm (str): Name of the meta language model. - downstream_llm (str): Name of the downstream language model. - evaluation_llm (str): Name of the evaluation language model. + task_name (str): Name of the task. Should not be None if used. + ds_path (str): Path to the dataset. Should not be None if used. + n_steps (int): Number of optimization steps. Should not be None if used. + optimizer (str): Name of the optimizer to use. Should not be None if used. + meta_llm (str): Name of the meta language model. Should not be None if used. + downstream_llm (str): Name of the downstream language model. Should not be None if used. + evaluation_llm (str): Name of the evaluation language model. Should not be None if used. init_pop_size (int): Initial population size. Defaults to 10. logging_dir (str): Directory for logging. Defaults to "logs/run.csv". experiment_name (str): Name of the experiment. Defaults to "experiment". @@ -27,15 +27,19 @@ class Config: donor_random (bool): Whether to use random donor prompts for EvoPromptDE. Defaults to False. random_seed (int): Random seed for reproducibility. Defaults to 42. selection_mode (str): Selection mode for EvoPromptGA. Defaults to "random". - meta_bs (int): Batch size for local meta LLM. Defaults to None. - downstream_bs (int): Batch size for local downstream LLM. Defaults to None. - api_token (str): API token for different APIs, as implemented in LLM classes. Defaults to None. - meta_prompt (str): Prompt template for the meta LLM. Defaults to None. - prepend_examplars (bool): rather to do examplar search and prepend few-shot examples. Defaults to False. - n_examplars (int): how many examplars to prepend. Only used if prepend_examplars is True. Defaults to 5. - exemplar_selector (str): which exemplar selector to use. Defaults to None. - n_ds_samples_to_meta (int): how many examples to show of the ds to show to meta-llm - (not applicable to every optimizer) + meta_bs (int): Batch size for local meta LLM. Should not be None if llm is run locally. Defaults to None. + downstream_bs (int): Batch size for local downstream LLM. + Should not be None if llm is run locally Defaults to None. + api_token (str): API token for different APIs, as implemented in LLM classes. + Should not be None if APILLM is used. Defaults to None. + meta_prompt (str): Prompt template for the meta LLM. + If None is set, default meta_prompts from template.py will be used. Defaults to None. + prepend_exemplars (bool): rather to do exemplar search and prepend few-shot examples. Defaults to False. + n_exemplars (int): how many exemplars to prepend. Only used if prepend_exemplars is True. Defaults to 5. + exemplar_selector (str): which exemplar selector to use. Should not be None if preped_exemplars is True. + Defaults to None. + n_ds_samples_to_meta (int): how many examples to show of the ds to show to meta-llm + (not applicable to every optimizer) """ task_name: str = None @@ -56,8 +60,8 @@ class Config: downstream_bs: Optional[int] = None api_token: Optional[str] = None meta_prompt: Optional[str] = None - prepend_examplars: Optional[bool] = False - n_examplars: Optional[int] = 5 + prepend_exemplars: Optional[bool] = False + n_exemplars: Optional[int] = 5 exemplar_selector: Optional[str] = None n_ds_samples_to_meta: Optional[int] = 2 diff --git a/promptolution/helpers.py b/promptolution/helpers.py index 6db64c7..be9de9f 100644 --- a/promptolution/helpers.py +++ b/promptolution/helpers.py @@ -55,9 +55,9 @@ def run_optimization(config: Config): prompts = optimizer.optimize(n_steps=config.n_steps) - if config.prepend_examplars: + if config.prepend_exemplars: selector = get_exemplar_selector(config.exemplar_selector, task, predictor) - prompts = [selector.select_exemplars(p, n_examples=config.n_examplars) for p in prompts] + prompts = [selector.select_exemplars(p, n_examples=config.n_exemplars) for p in prompts] return prompts From 5f5298fcf9fcc0ad2a9900169e8134acfe593b2d Mon Sep 17 00:00:00 2001 From: Tom Zehle Date: Sun, 13 Oct 2024 23:05:52 +0200 Subject: [PATCH 5/8] Update promptolution/predictors/base_predictor.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Timo Heiß <87521684+timo282@users.noreply.github.com> --- promptolution/predictors/base_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promptolution/predictors/base_predictor.py b/promptolution/predictors/base_predictor.py index 3535c62..6a66596 100644 --- a/promptolution/predictors/base_predictor.py +++ b/promptolution/predictors/base_predictor.py @@ -35,7 +35,7 @@ def predict(self, prompts: List[str], xs: np.ndarray, return_seq: bool = False) Args: prompts (List[str]): List of prompts to use for prediction. xs (np.ndarray): Array of input data. - return_seq (bool, optional): rather to return the generating sequence + return_seq (bool, optional): whether to return the generating sequence Returns: np.ndarray: Array of predictions. From 71e7555315e1da5c1c54d91be43f2e1a4d035fa2 Mon Sep 17 00:00:00 2001 From: Tom Zehle Date: Sun, 13 Oct 2024 23:11:42 +0200 Subject: [PATCH 6/8] Update promptolution/tasks/classification_tasks.py MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Timo Heiß <87521684+timo282@users.noreply.github.com> --- promptolution/tasks/classification_tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promptolution/tasks/classification_tasks.py b/promptolution/tasks/classification_tasks.py index f077b59..f37deec 100644 --- a/promptolution/tasks/classification_tasks.py +++ b/promptolution/tasks/classification_tasks.py @@ -115,7 +115,7 @@ def evaluate( n_samples (int, optional): Number of samples to use if subsampling. Defaults to 20. subsample (bool, optional): Whether to use subsampling. If set to true, samples a different subset per call. Defaults to False. - return_seq (bool, optional): rather to return the generating sequence + return_seq (bool, optional): whether to return the generating sequence Returns: np.ndarray: Array of accuracy scores for each prompt. From bf31cb15cbd032b8eb8a26afece2908c87061546 Mon Sep 17 00:00:00 2001 From: finitearth Date: Sun, 13 Oct 2024 23:17:50 +0200 Subject: [PATCH 7/8] resolve comments --- notebooks/getting_started.ipynb | 92 +++++++++++++------ promptolution/config.py | 20 ++-- promptolution/exemplar_selectors/__init__.py | 20 ++-- .../random_search_selector.py | 4 +- .../exemplar_selectors/random_selector.py | 2 +- promptolution/helpers.py | 3 +- promptolution/optimizers/base_optimizer.py | 10 +- promptolution/optimizers/evoprompt_de.py | 6 +- promptolution/optimizers/evoprompt_ga.py | 11 ++- promptolution/optimizers/opro.py | 6 +- promptolution/predictors/base_predictor.py | 14 ++- promptolution/predictors/classificator.py | 15 ++- 12 files changed, 143 insertions(+), 60 deletions(-) diff --git a/notebooks/getting_started.ipynb b/notebooks/getting_started.ipynb index d785db5..0c38f11 100644 --- a/notebooks/getting_started.ipynb +++ b/notebooks/getting_started.ipynb @@ -44,7 +44,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -64,9 +64,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\tzehl\\Documents\\programming\\promptolution\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "from promptolution.helpers import run_experiment\n", "from promptolution.config import Config" @@ -90,13 +99,13 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "config = Config(\n", - " task_name=\"agnews\",\n", - " ds_path=\"../data_sets/cls/agnews/\",\n", + " task_name=\"subj\",\n", + " ds_path=\"../data_sets/cls/subj/\",\n", " n_steps=8,\n", " optimizer=\"evopromptde\",\n", " meta_llm=\"meta-llama/Meta-Llama-3-8B-Instruct\",\n", @@ -105,12 +114,13 @@ " api_token=token,\n", " prepend_exemplars=True,\n", " exemplar_selector=\"random_search\",\n", + " n_exemplars=3,\n", ")" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -149,28 +159,53 @@ " \n", " \n", " \n", + " 1\n", + " evaluate each sentence as either objective or ...\n", + " 0.80\n", + " \n", + " \n", + " 8\n", + " As a linguist, analyze a statement from a movi...\n", + " 0.80\n", + " \n", + " \n", " 3\n", - " You will be required to classify a news articl...\n", - " 0.70\n", + " identify whether the given sentence was expres...\n", + " 0.65\n", " \n", " \n", - " 2\n", - " Categorize the news article into one of four c...\n", - " 0.58\n", + " 5\n", + " Analyze the textual content of a given stateme...\n", + " 0.65\n", " \n", " \n", - " 4\n", - " Your task is to identify the primary topic of ...\n", + " 9\n", + " determine the classification of each sentence ...\n", + " 0.60\n", + " \n", + " \n", + " 0\n", + " evaluate each statement as either subjective o...\n", " 0.50\n", " \n", " \n", - " 1\n", - " Based on the main theme of given the news arti...\n", - " 0.36\n", + " 2\n", + " Classify the sentence according to its subject...\n", + " 0.40\n", " \n", " \n", - " 0\n", - " Classify the topic of the following news as \"W...\n", + " 6\n", + " As a classifier, interpret phrases in movie re...\n", + " 0.35\n", + " \n", + " \n", + " 7\n", + " and\\n\\nshae is about to return to bed when she...\n", + " 0.35\n", + " \n", + " \n", + " 4\n", + " Analyze reviews and label them as subjective o...\n", " 0.30\n", " \n", " \n", @@ -179,14 +214,19 @@ ], "text/plain": [ " prompt score\n", - "3 You will be required to classify a news articl... 0.70\n", - "2 Categorize the news article into one of four c... 0.58\n", - "4 Your task is to identify the primary topic of ... 0.50\n", - "1 Based on the main theme of given the news arti... 0.36\n", - "0 Classify the topic of the following news as \"W... 0.30" + "1 evaluate each sentence as either objective or ... 0.80\n", + "8 As a linguist, analyze a statement from a movi... 0.80\n", + "3 identify whether the given sentence was expres... 0.65\n", + "5 Analyze the textual content of a given stateme... 0.65\n", + "9 determine the classification of each sentence ... 0.60\n", + "0 evaluate each statement as either subjective o... 0.50\n", + "2 Classify the sentence according to its subject... 0.40\n", + "6 As a classifier, interpret phrases in movie re... 0.35\n", + "7 and\\n\\nshae is about to return to bed when she... 0.35\n", + "4 Analyze reviews and label them as subjective o... 0.30" ] }, - "execution_count": 14, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } diff --git a/promptolution/config.py b/promptolution/config.py index d7a7c7d..dac2d9a 100644 --- a/promptolution/config.py +++ b/promptolution/config.py @@ -2,7 +2,7 @@ import configparser from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, Optional +from typing import Any, Dict, Literal, Optional @dataclass @@ -33,13 +33,14 @@ class Config: api_token (str): API token for different APIs, as implemented in LLM classes. Should not be None if APILLM is used. Defaults to None. meta_prompt (str): Prompt template for the meta LLM. - If None is set, default meta_prompts from template.py will be used. Defaults to None. - prepend_exemplars (bool): rather to do exemplar search and prepend few-shot examples. Defaults to False. - n_exemplars (int): how many exemplars to prepend. Only used if prepend_exemplars is True. Defaults to 5. - exemplar_selector (str): which exemplar selector to use. Should not be None if preped_exemplars is True. - Defaults to None. - n_ds_samples_to_meta (int): how many examples to show of the ds to show to meta-llm - (not applicable to every optimizer) + If None is set, default meta_prompts from template.py will be used. Defaults to None. + prepend_exemplars (bool): rather to do exemplar search and prepend few-shot examples. Defaults to False. + n_exemplars (int): how many exemplars to prepend. Only used if prepend_exemplars is True. Defaults to 5. + exemplar_selector (str): which exemplar selector to use. Should not be None if preped_exemplars is True. + Defaults to None. + n_ds_samples_to_meta (int): how many examples to show of the ds to show to meta-llm + (not applicable to every optimizer) + n_eval_samples (int): how many examples to show to evaluation llm for evaluation. """ task_name: str = None @@ -55,7 +56,7 @@ class Config: include_task_desc: bool = True donor_random: bool = False random_seed: int = 42 - selection_mode: Optional[str] = "random" + selection_mode: Optional[Literal["random", "wheel", "tour"]] = "random" meta_bs: Optional[int] = None downstream_bs: Optional[int] = None api_token: Optional[str] = None @@ -64,6 +65,7 @@ class Config: n_exemplars: Optional[int] = 5 exemplar_selector: Optional[str] = None n_ds_samples_to_meta: Optional[int] = 2 + n_eval_samples: Optional[int] = 20 def __post_init__(self): """Validate the configuration after initialization.""" diff --git a/promptolution/exemplar_selectors/__init__.py b/promptolution/exemplar_selectors/__init__.py index dc031e2..f234373 100644 --- a/promptolution/exemplar_selectors/__init__.py +++ b/promptolution/exemplar_selectors/__init__.py @@ -1,12 +1,19 @@ """Module for exemplar selectors.""" +from typing import Literal + from promptolution.exemplar_selectors.random_search_selector import RandomSearchSelector from promptolution.exemplar_selectors.random_selector import RandomSelector from promptolution.predictors.base_predictor import BasePredictor from promptolution.tasks.base_task import BaseTask +SELECTOR_MAP = { + "random": RandomSelector, + "random_search": RandomSearchSelector, +} + -def get_exemplar_selector(name: str, task: BaseTask, predictor: BasePredictor): +def get_exemplar_selector(name: Literal["random", "random_search"], task: BaseTask, predictor: BasePredictor): """Factory function to get an exemplar selector based on the given name. Args: @@ -18,12 +25,9 @@ def get_exemplar_selector(name: str, task: BaseTask, predictor: BasePredictor): BaseExemplarSelector: An instance of the requested exemplar selector. Raises: - ValueError: If the requested selector name is not found in the SELECTOR_MAPPING. + ValueError: If the requested selector name is not found. """ - if name == "random": - return RandomSelector(task, predictor) - - if name == "random_search": - return RandomSearchSelector(task, predictor) + if name not in SELECTOR_MAP: + raise ValueError(f"Exemplar selector '{name}' not found. Available selectors: {list(SELECTOR_MAP.keys())}") - raise ValueError(f"Unknown exemplar selector: {name}") + return SELECTOR_MAP[name](task, predictor) diff --git a/promptolution/exemplar_selectors/random_search_selector.py b/promptolution/exemplar_selectors/random_search_selector.py index c9c1b5a..005fef8 100644 --- a/promptolution/exemplar_selectors/random_search_selector.py +++ b/promptolution/exemplar_selectors/random_search_selector.py @@ -29,8 +29,8 @@ def select_exemplars(self, prompt, n_examples: int = 5, n_trials: int = 5): for _ in range(n_trials): _, seq = self.task.evaluate(prompt, self.predictor, n_samples=n_examples, subsample=True, return_seq=True) - # evaluate prompts as few shots - prompt_with_examples = "\n".join([prompt] + seq) + prompt_with_examples = "\n\n".join([prompt] + seq) + "\n\n" + # evaluate prompts as few shot prompt score = self.task.evaluate(prompt_with_examples, self.predictor, subsample=True) if score > best_score: best_score = score diff --git a/promptolution/exemplar_selectors/random_selector.py b/promptolution/exemplar_selectors/random_selector.py index 4d8540c..5fe01ae 100644 --- a/promptolution/exemplar_selectors/random_selector.py +++ b/promptolution/exemplar_selectors/random_selector.py @@ -41,6 +41,6 @@ def select_exemplars(self, prompt, n_examples: int = 5): score, seq = self.task.evaluate(prompt, self.predictor, n_samples=1, return_seq=True) if score == self.desired_score: examples.append(seq[0]) - prompt = "\n".join([prompt] + examples) + prompt = "\n\n".join([prompt] + examples) + "\n\n" return prompt diff --git a/promptolution/helpers.py b/promptolution/helpers.py index be9de9f..9d776a9 100644 --- a/promptolution/helpers.py +++ b/promptolution/helpers.py @@ -51,6 +51,7 @@ def run_optimization(config: Config): initial_prompts=init_pop, task=task, predictor=predictor, + n_eval_samples=config.n_eval_samples, ) prompts = optimizer.optimize(n_steps=config.n_steps) @@ -77,7 +78,7 @@ def run_evaluation(config: Config, prompts: List[str]): llm = get_llm(config.evaluation_llm, token=config.api_token) predictor = Classificator(llm, classes=task.classes) - scores = task.evaluate(prompts, predictor, subsample=True, n_samples=50) + scores = task.evaluate(prompts, predictor, subsample=True, n_samples=config.n_eval_samples) df = pd.DataFrame(dict(prompt=prompts, score=scores)) df = df.sort_values("score", ascending=False) diff --git a/promptolution/optimizers/base_optimizer.py b/promptolution/optimizers/base_optimizer.py index effc329..2cac685 100644 --- a/promptolution/optimizers/base_optimizer.py +++ b/promptolution/optimizers/base_optimizer.py @@ -26,12 +26,20 @@ class BaseOptimizer(ABC): predictor (optional): Predictor for prompt evaluation. Defaults to None. """ - def __init__(self, initial_prompts: list[str], task: BaseTask, callbacks: list[Callable] = [], predictor=None): + def __init__( + self, + initial_prompts: list[str], + task: BaseTask, + callbacks: list[Callable] = [], + predictor=None, + n_eval_samples=20, + ): """Initialize the BaseOptimizer.""" self.prompts = initial_prompts self.task = task self.callbacks = callbacks self.predictor = predictor + self.n_eval_samples = n_eval_samples @abstractmethod def optimize(self, n_steps: int) -> List[str]: diff --git a/promptolution/optimizers/evoprompt_de.py b/promptolution/optimizers/evoprompt_de.py index 8ae1ab3..17d74b3 100644 --- a/promptolution/optimizers/evoprompt_de.py +++ b/promptolution/optimizers/evoprompt_de.py @@ -51,7 +51,7 @@ def optimize(self, n_steps: int) -> List[str]: Returns: List[str]: The optimized list of prompts after all steps. """ - self.scores = self.task.evaluate(self.prompts, self.predictor) + self.scores = self.task.evaluate(self.prompts, self.predictor, subsample=True, n_samples=self.n_eval_samples) self.prompts = [prompt for _, prompt in sorted(zip(self.scores, self.prompts), reverse=True)] self.scores = sorted(self.scores, reverse=True) @@ -80,7 +80,9 @@ def optimize(self, n_steps: int) -> List[str]: child_prompts = self.meta_llm.get_response(meta_prompts) child_prompts = [prompt.split("")[-1].split("")[0].strip() for prompt in child_prompts] - child_scores = self.task.evaluate(child_prompts, self.predictor, subsample=True) + child_scores = self.task.evaluate( + child_prompts, self.predictor, subsample=True, n_samples=self.n_eval_samples + ) for i in range(len(self.prompts)): if child_scores[i] > self.scores[i]: diff --git a/promptolution/optimizers/evoprompt_ga.py b/promptolution/optimizers/evoprompt_ga.py index 0c51a17..2ec789b 100644 --- a/promptolution/optimizers/evoprompt_ga.py +++ b/promptolution/optimizers/evoprompt_ga.py @@ -56,7 +56,9 @@ def optimize(self, n_steps: int) -> List[str]: List[str]: The optimized list of prompts after all steps. """ # get scores from task - self.scores = self.task.evaluate(self.prompts, self.predictor, subsample=True).tolist() + self.scores = self.task.evaluate( + self.prompts, self.predictor, subsample=True, n_samples=self.n_eval_samples + ).tolist() # sort prompts by score self.prompts = [prompt for _, prompt in sorted(zip(self.scores, self.prompts), reverse=True)] self.scores = sorted(self.scores, reverse=True) @@ -64,7 +66,12 @@ def optimize(self, n_steps: int) -> List[str]: for _ in range(n_steps): new_prompts = self._crossover(self.prompts, self.scores) prompts = self.prompts + new_prompts - scores = self.scores + self.task.evaluate(new_prompts, self.predictor, subsample=True).tolist() + scores = ( + self.scores + + self.task.evaluate( + new_prompts, self.predictor, subsample=True, n_samples=self.n_eval_samples + ).tolist() + ) # sort scores and prompts self.prompts = [prompt for _, prompt in sorted(zip(scores, prompts), reverse=True)][: len(self.prompts)] diff --git a/promptolution/optimizers/opro.py b/promptolution/optimizers/opro.py index 4dc66e0..b2fa645 100644 --- a/promptolution/optimizers/opro.py +++ b/promptolution/optimizers/opro.py @@ -38,7 +38,9 @@ def __init__(self, meta_llm: BaseLLM, n_samples: int = 2, prompt_template: str = super().__init__(**args) self.meta_prompt = self.meta_prompt.replace("", self.task.description) - self.scores = [self.task.evaluate(p, self.predictor) for p in self.prompts] + self.scores = [ + self.task.evaluate(p, self.predictor, subsample=True, n_samples=self.n_eval_samples) for p in self.prompts + ] def _sample_examples(self): """Sample examples from the task dataset with their label. @@ -78,7 +80,7 @@ def optimize(self, n_steps: int) -> List[str]: prompt = self.meta_llm.get_response([meta_prompt])[0] prompt = prompt.split("")[-1].split("")[0].strip() - score = self.task.evaluate(prompt, self.predictor, subsample=True) + score = self.task.evaluate(prompt, self.predictor, subsample=True, n_samples=self.n_eval_samples) self.prompts.append(prompt) self.scores.append(score) diff --git a/promptolution/predictors/base_predictor.py b/promptolution/predictors/base_predictor.py index 6a66596..e2be1f4 100644 --- a/promptolution/predictors/base_predictor.py +++ b/promptolution/predictors/base_predictor.py @@ -1,10 +1,12 @@ """Base module for predictors.""" from abc import abstractmethod -from typing import List +from typing import List, Tuple import numpy as np +from promptolution.llms.base_llm import BaseLLM + class BasePredictor: """Abstract base class for predictors in the promptolution library. @@ -20,7 +22,7 @@ class BasePredictor: to make predictions based on prompts and input data. """ - def __init__(self, llm): + def __init__(self, llm: BaseLLM): """Initialize the Classificator. Args: @@ -54,7 +56,13 @@ def predict(self, prompts: List[str], xs: np.ndarray, return_seq: bool = False) return preds - def _extract_preds(self, preds, shape): + def _extract_preds(self, preds: List[str], shape: Tuple[int, int]) -> np.ndarray: + """Extract class labels from the predictions, based on the list of valid class labels. + + Args: + preds: The raw predictions from the language model. + shape: The shape of the output array: (n_prompts, n_samples). + """ raise NotImplementedError diff --git a/promptolution/predictors/classificator.py b/promptolution/predictors/classificator.py index 9679266..f33bfc6 100644 --- a/promptolution/predictors/classificator.py +++ b/promptolution/predictors/classificator.py @@ -1,6 +1,6 @@ """Module for classification predictors.""" -from typing import List +from typing import List, Tuple import numpy as np @@ -11,7 +11,10 @@ class Classificator(BasePredictor): """A predictor class for classification tasks using language models. This class takes a language model and a list of classes, and provides a method - to predict classes for given prompts and input data. + to predict classes for given prompts and input data. The class labels are extracted + by matching the words in the prediction with the list of valid class labels. + The first occurrence of a valid class label in the prediction is used as the predicted class. + If no valid class label is found, the first class label in the list is used as the default prediction. Attributes: llm: The language model used for generating predictions. @@ -31,7 +34,13 @@ def __init__(self, llm, classes, *args, **kwargs): super().__init__(llm) self.classes = classes - def _extract_preds(self, preds, shape): + def _extract_preds(self, preds: List[str], shape: Tuple[int, int]) -> np.ndarray: + """Extract class labels from the predictions, based on the list of valid class labels. + + Args: + preds: The raw predictions from the language model. + shape: The shape of the output array: (n_prompts, n_samples). + """ response = [] for pred in preds: predicted_class = self.classes[0] # use first class as default pred From e2fed79c59ec5c3a277d6a591a9af6d32abd3668 Mon Sep 17 00:00:00 2001 From: finitearth Date: Sun, 13 Oct 2024 23:32:09 +0200 Subject: [PATCH 8/8] resolve comments --- promptolution/predictors/base_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/promptolution/predictors/base_predictor.py b/promptolution/predictors/base_predictor.py index e2be1f4..eea7f74 100644 --- a/promptolution/predictors/base_predictor.py +++ b/promptolution/predictors/base_predictor.py @@ -23,7 +23,7 @@ class BasePredictor: """ def __init__(self, llm: BaseLLM): - """Initialize the Classificator. + """Initialize the BasePredictor. Args: llm: The language model to use for predictions.