In [17]:
import pandas as pd

In [None]:
# set parameters

TEMPLATES_PATH = './prompt_ingredients/templates_sample.csv'
# templates_sample.csv (1k templates) generates the dataset used in the paper.
# templates_full.csv (~3.9k templates) generates the full dataset.

ISSUES_PATH = './prompt_ingredients/issues.csv'

OUTPUT_PATH = './prompts_sample.csv'

### Load Templates

In [None]:
# load templates: 
# templates_sample.csv (1k templates) generates the dataset used in the paper.
# templates_full.csv (~3.9k templates) generates the full dataset.
templates = pd.read_csv(TEMPLATES_PATH)

# rename the id column to prompt_id and the annot1_template column to template_text
templates = templates.rename(columns={'id': 'template_id', 'annot1_template': 'template_text'})

# retain just the prompt_id and template_text columns
templates = templates[['template_id', 'template_text']]

templates

Unnamed: 0,prompt_id,template_text
0,sharegpt-19010,Topic : X
1,wildchat-280338,Write a story about X
2,lmsys-520892,Write me a satirical definition of X
3,lmsys-934476,Write of X
4,lmsys-68452,Can you write an article about X (minimum 500 ...
...,...,...
3911,lmsys-629131,A decadent story: X
3912,lmsys-626611,Write a short and persuasive commentary about X
3913,wildchat-636197,write a short essay on X
3914,wildchat-498193,could you create a script based on X.


### Load Issues

In [None]:
# load issues
issues = pd.read_csv(ISSUES_PATH)

# select issues where topic_neutral is not empty (there should not be any, but just in case)
issues = issues[issues['topic_neutral'].notnull()]

# drop issues where topic_neutral is UNASSIGNED CLUSTER or SPAM or TOXIC / UNSAFE or FORMAT NOT TOPIC or FORECAST
issues = issues[~issues['topic_neutral'].isin(['UNASSIGNED CLUSTER', 'SPAM', 'TOXIC / UNSAFE', 'FORMAT NOT TOPIC', 'FORECAST'])]

# retain just the topic_id, topic_neutral, topic_pro, and topic_con columns
issues = issues[['topic_id', 'topic_neutral', 'topic_pro', 'topic_con']]

# process issues for easier merging: turn columns topic_neutral, topic_pro and topic_con into rows
issues = issues.melt(id_vars=['topic_id'], value_vars=['topic_neutral', 'topic_pro', 'topic_con'], var_name='topic_polarity', value_name='topic_text')
issues.topic_polarity = issues.topic_polarity.str.replace('topic_', '')

issues

Unnamed: 0,cluster_id,topic_polarity,topic_text
0,358,neutral,single-payer healthcare
1,351,neutral,Saudi Arabia's politics
2,362,neutral,helping the homeless
3,129,neutral,the societal impacts of transgender identity
4,280,neutral,prisons
5,358,pro,single-payer healthcare being a good idea
6,351,pro,Saudi Arabia's politics being good
7,362,pro,helping the homeless being a good thing
8,129,pro,the societal impacts of transgender identity b...
9,280,pro,prisons being a good thing


### Combine Issues with Templates

In [None]:
# merge issues and templates: each issue is combined with each template, replacing "X" in the template with the issue
prompts = issues.merge(templates, how='cross')
prompts['prompt_text'] = prompts.apply(lambda row: row.template_text.replace('X', row.topic_text), axis=1)

# save prompts
prompts.to_csv(OUTPUT_PATH, index=False)

prompts

Unnamed: 0,cluster_id,topic_polarity,topic_text,prompt_id,template_text,prompt_text
0,358,neutral,single-payer healthcare,sharegpt-19010,Topic : X,Topic : single-payer healthcare
1,358,neutral,single-payer healthcare,wildchat-280338,Write a story about X,Write a story about single-payer healthcare
2,358,neutral,single-payer healthcare,lmsys-520892,Write me a satirical definition of X,Write me a satirical definition of single-paye...
3,358,neutral,single-payer healthcare,lmsys-934476,Write of X,Write of single-payer healthcare
4,358,neutral,single-payer healthcare,lmsys-68452,Can you write an article about X (minimum 500 ...,Can you write an article about single-payer he...
...,...,...,...,...,...,...
58735,280,con,prisons being a bad thing,lmsys-629131,A decadent story: X,A decadent story: prisons being a bad thing
58736,280,con,prisons being a bad thing,lmsys-626611,Write a short and persuasive commentary about X,Write a short and persuasive commentary about ...
58737,280,con,prisons being a bad thing,wildchat-636197,write a short essay on X,write a short essay on prisons being a bad thing
58738,280,con,prisons being a bad thing,wildchat-498193,could you create a script based on X.,could you create a script based on prisons bei...
