This codebook creates the prompts for ChatGPT: The prompts are designed to elicit ChatGPT's responses to various questionnaires.

## Load Packages

In [1]:
import pandas as pd
import numpy as np
import pickle
import os

## Define Functions

### General Parameter

In [2]:
FOLDER = "../data/surveys/"

# define which variables are of interest (will search for participants who have these information)
COLS_META = ["sex", "age", "currentCountry",  "political_opinion", "race", 
        "religion", "education", "income", "social_class"] 

### Functions

In [33]:
def extractSurveys(data):
    path = FOLDER + data + ".csv"
    df = pd.read_csv(path)
    print(set(COLS_META) <= set(df.columns)) # check if meta information is in dataset
    
    idx = df.columns.tolist().index("sex") #index of last survey item
    cols_items = df.columns.tolist()[:idx]
    print(cols_items[-1]) # print last survey item
    
    df_items = df.dropna(subset=cols_items, axis = 0)
    df_total = df_items.dropna(subset=COLS_META).reset_index(drop=True)
    df_total.to_csv("../data/processed/" + data + "_cleaned.csv", index=False)

    return df_total, cols_items

## Load item texts and create prompts

### Need for Cognition

In [136]:
d = "cognition"
df_cognition, cols_meta_cognition = extractSurveys(d)

True
cognition_18


In [137]:
PROMPT_TEXT = "For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 5, with 1 meaning strongly disagree and 5 meaning strongly agree. Respond with a single number. The statement is: "

items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
# check the respective texts in the original questionnaires
prompts = [PROMPT_TEXT + "\"" + x + "\"" for x in items.item_text]
#show prompts
with open("../data/prompts/" + d + ".pkl", "wb") as output:
    pickle.dump(prompts, output)

### Closure

In [39]:
d = "closure"
df_closure, cols_meta_closure = extractSurveys(d)

True
closure_16


In [40]:
PROMPT_TEXT = "For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 6, with 1 meaning strongly disagree and 6 meaning strongly agree. Respond with a single number. The statement is: "

items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
# check the respective texts in the original questionnaires
prompts = [PROMPT_TEXT + "\"" + x + "\"" for x in items.item_text]
#show prompts
with open("../data/prompts/" + d + ".pkl", "wb") as output:
    pickle.dump(prompts, output)

In [41]:
#### Check created prompts
# choose dataset
d = "closure"
# create path
path = "../data/items/" + d + "_items.csv"
path_items = '../data/items/' + d + '_items.csv'

with open ('../data/prompts/' + d + ".pkl", 'rb') as fp:
    prompts = pickle.load(fp)
    
prompts[0]

'For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 6, with 1 meaning strongly disagree and 6 meaning strongly agree. Respond with a single number. The statement is: "In case of uncertainty, I prefer to make an immediate decision, whatever it may be."'

### BIG5

In [None]:
d = "bigfive"
df_bigfive, cols_meta_bigfive = extractSurveys(d)

  df = pd.read_csv(path)


True
bigfive_44


In [None]:
PROMPT_TEXT = "For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 5, with 1 meaning strongly disagree and 5 meaning strongly agree. Respond with a single number. The statement is: "

items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
# check the respective texts in the original questionnaires
prompts = [PROMPT_TEXT + "\"" + x + "\"" for x in items.item_text]
#show prompts
with open("../data/prompts/" + d + ".pkl", "wb") as output:
    pickle.dump(prompts, output)

### RWA

In [35]:
d = "rwa"
df_rwa, cols_meta_rwa = extractSurveys(d)

True
rwa_15


In [36]:
PROMPT_TEXT = "For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 6, with 1 meaning strongly disagree and 6 meaning strongly agree. Respond with a single number. The statement is: "

items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
# check the respective texts in the original questionnaires
prompts = [PROMPT_TEXT + "\"" + x + "\"" for x in items.item_text]
#show prompts
with open("../data/prompts/" + d + ".pkl", "wb") as output:
    pickle.dump(prompts, output)

### Systems & Feelings

In [20]:
d = "systems_feelings"
df_systems_feelings, cols_meta_systems_feelings = extractSurveys(d)

  df = pd.read_csv(path)


True
systems_feelings_42


In [21]:
PROMPT_TEXT = "For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 4, with 1 meaning strongly disagree and 4 meaning strongly agree. Respond with a single number. The statement is: "

items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
# check the respective texts in the original questionnaires
prompts = [PROMPT_TEXT + "\"" + x + "\"" for x in items.item_text]
#show prompts
with open("../data/prompts/" + d + ".pkl", "wb") as output:
    pickle.dump(prompts, output)

### Cognitive Style Measure

In [119]:
d = "cogref"
df_cogref, cols_meta_cogref = extractSurveys(d)

  df = pd.read_csv(path)


True
cogref_40


In [122]:
PROMPT_TEXT = "For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 5, with 1 meaning definitely not true of me and 5 meaning definitely true of me. Respond with a single number. The statement is: "

items = pd.read_csv("../data/items/" + d + "_items.csv", sep=";")
# check the respective texts in the original questionnaires
prompts = [PROMPT_TEXT + "\"" + x + "\"" for x in items.item_text]
#show prompts
with open("../data/prompts/" + d + ".pkl", "wb") as output:
    pickle.dump(prompts, output)

In [20]:
d = "cognition"
with open("../data/prompts/" + d + ".pkl", "rb") as f:
    prompts = pickle.load(f)

prompts[0]

'For a scientific study, please indicate your level of agreement with the following statement. Use integer numbers from 1 to 5, with 1 meaning strongly disagree, 3 meaning neutral, and 5 meaning strongly agree. Respond with a single number. The statement is: "I would prefer complex to simple problems."'