In [1]:
import pandas as pd

In [2]:
# set parameters

TEMPLATES_PATH = './prompt_ingredients/templates_sample.csv'
# templates_sample.csv (1k templates) generates the dataset used in the paper.
# templates_full.csv (~3.9k templates) generates the full dataset.

ISSUES_PATH = './prompt_ingredients/issues.csv'

OUTPUT_PATH = './prompts_sample.csv'

### Load Templates

In [3]:
# load templates: 
# templates_sample.csv (1k templates) generates the dataset used in the paper.
# templates_full.csv (~3.9k templates) generates the full dataset.
templates = pd.read_csv(TEMPLATES_PATH)

# rename the id column to prompt_id and the annot1_template column to template_text
templates = templates.rename(columns={'id': 'template_id', 'annot1_template': 'template_text'})

# retain just the prompt_id and template_text columns
templates = templates[['template_id', 'template_text']]

templates

Unnamed: 0,template_id,template_text
0,sharegpt-18242,Create a 2-page handbook about X.
1,lmsys-915617,make sentences with the words X
2,lmsys-451092,You are an expert in finance and you must writ...
3,lmsys-370588,"write a post titled ""X"""
4,lmsys-292302,"Help me write a text called ""X"""
...,...,...
995,wildchat-488899,I want you to write comedy crime drama points ...
996,lmsys-485939,Write a short dialogue between two people abou...
997,wildchat-626949,"Using what you know, write paragraphs about X."
998,wildchat-372873,write a script for a youtube channel about X


### Load Issues

In [4]:
# load issues
issues = pd.read_csv(ISSUES_PATH)

# select issues where topic_neutral is not empty (there should not be any, but just in case)
issues = issues[issues['topic_neutral'].notnull()]

# drop issues where topic_neutral is UNASSIGNED CLUSTER or SPAM or TOXIC / UNSAFE or FORMAT NOT TOPIC or FORECAST
issues = issues[~issues['topic_neutral'].isin(['UNASSIGNED CLUSTER', 'SPAM', 'TOXIC / UNSAFE', 'FORMAT NOT TOPIC', 'FORECAST'])]

# retain just the topic_id, topic_neutral, topic_pro, and topic_con columns
issues = issues[['topic_id', 'topic_neutral', 'topic_pro', 'topic_con']]

# process issues for easier merging: turn columns topic_neutral, topic_pro and topic_con into rows
issues = issues.melt(id_vars=['topic_id'], value_vars=['topic_neutral', 'topic_pro', 'topic_con'], var_name='topic_polarity', value_name='topic_text')
issues.topic_polarity = issues.topic_polarity.str.replace('topic_', '')

issues

Unnamed: 0,topic_id,topic_polarity,topic_text
0,18,neutral,the 2023 Israeli invasion of Gaza
1,174,neutral,the societal impact of AI
2,387,neutral,the impact of climate change
3,339,neutral,the 2022 Russian invasion of Ukraine
4,20,neutral,the COVID-19 vaccine
...,...,...,...
631,344,con,fake news being a bad thing
632,221,con,patriotism being bad
633,288,con,the US Judicial System requiring reform
634,340,con,China's Belt and Road Initiative being bad


### Combine Issues with Templates

In [5]:
# merge issues and templates: each issue is combined with each template, replacing "X" in the template with the issue
prompts = issues.merge(templates, how='cross')
prompts['prompt_text'] = prompts.apply(lambda row: row.template_text.replace('X', row.topic_text), axis=1)

# save prompts
prompts.to_csv(OUTPUT_PATH, index=False)

prompts

Unnamed: 0,topic_id,topic_polarity,topic_text,template_id,template_text,prompt_text
0,18,neutral,the 2023 Israeli invasion of Gaza,sharegpt-18242,Create a 2-page handbook about X.,Create a 2-page handbook about the 2023 Israel...
1,18,neutral,the 2023 Israeli invasion of Gaza,lmsys-915617,make sentences with the words X,make sentences with the words the 2023 Israeli...
2,18,neutral,the 2023 Israeli invasion of Gaza,lmsys-451092,You are an expert in finance and you must writ...,You are an expert in finance and you must writ...
3,18,neutral,the 2023 Israeli invasion of Gaza,lmsys-370588,"write a post titled ""X""","write a post titled ""the 2023 Israeli invasion..."
4,18,neutral,the 2023 Israeli invasion of Gaza,lmsys-292302,"Help me write a text called ""X""","Help me write a text called ""the 2023 Israeli ..."
...,...,...,...,...,...,...
635995,60,con,killing someone being morally wrong,wildchat-488899,I want you to write comedy crime drama points ...,I want you to write comedy crime drama points ...
635996,60,con,killing someone being morally wrong,lmsys-485939,Write a short dialogue between two people abou...,Write a short dialogue between two people abou...
635997,60,con,killing someone being morally wrong,wildchat-626949,"Using what you know, write paragraphs about X.","Using what you know, write paragraphs about ki..."
635998,60,con,killing someone being morally wrong,wildchat-372873,write a script for a youtube channel about X,write a script for a youtube channel about kil...
