In [1]:
# if needed, install and/or upgrade to the latest version of the OpenAI Python library
#%pip install --upgrade openai

In [1]:
# import the OpenAI Python library for calling the OpenAI API
import os
import openai
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import time
import requests

In [2]:
# Set your API key
openai.api_key = # Insert API key

# Example OpenAI Python library request
MODEL = "gpt-3.5-turbo"

In [3]:
# Create an ExcelWriter object and specify the file name
excel_file = 'Test Data - Revised Prompts.xlsx'
writer = pd.ExcelWriter(excel_file, engine='xlsxwriter')

# Test Data

## High Power

In [3]:
dtypes = {'#': str, 
          'Statement': str, 
          'Human 1': str,
          'Human 2': str
         }

# Put input file here
input_file = os.path.join("..", "data", "raw", "Copy of Power Recall Prime - ChatGPT Prompt Reliability ST MS V2 ST 23 June 2023 MS ST.xlsx" )

# Select columns to read
in_df = pd.read_excel(input_file, sheet_name='High Power', usecols="B:E", dtype=dtypes, skiprows=3)

# Fill any empty cells
in_df.fillna('', inplace=True)

In [4]:
in_df = in_df[0:20]

In [5]:
in_df

Unnamed: 0,#,Statement,Human 1,Human 2
0,1,A few years ago I was an instructor for teens....,Y,Y
1,2,A situation in which I was in power over other...,Y,Y
2,3,"At the job I used to have, the manager left me...",Y,Y
3,4,"For sometime my wife wanted her own car, but s...",Y,Y
4,5,"I am a professor, so I have power over the stu...",Y,Y
5,6,I felt that I had power over a person when I w...,Y,Y
6,7,I had power at a former job as I was in manage...,Y,Y
7,8,I was in a position to evaluate two employees....,Y,Y
8,9,"I am in a management role, this is part of my ...",Y,Y
9,10,I have a nephew and two nieces that I babysat ...,Y,Y


In [6]:
# add a new column called "CorrectText" to the dataframe
in_df['CorrectText'] = ""

In [10]:
prompt = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 

High power
For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall a particular incident in which you had power over another individual or individuals. By power, we mean a situation in which you controlled the ability of another person or persons to get something they wanted, or were in a position to evaluate those individuals. Please describe this situation in which you had power— what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer was the supervisor of another individual, had authority over another individual, and/or was able to control the behavior or outcome of another individual.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer did not have power over another individual or individuals, did not control the ability of another person or persons to get something they wanted, or were not in a position to evaluate another individual or individuals. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.

"""

positive_example = """
I am a professor, so I have power over the students who take my classes. I control what assignments they do, what we do in class, whether excuses are allowed, and ultimately their final grades. I am uncomfortable wielding power most of the time. I enjoy casses the most in which the students act respectful and do not make me use my power over them in a controlling way.
"""

negative_example = """
A situation in which I controlled the other person's behavior in order to get something I wanted, or I were in a position to evaluate this person.
"""


# loop through the "Text"" column of the first 20 rows
for i in range(0,20):
    text = in_df.iloc[i]['Statement']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"You are a helpful research assistant assessing whether a sample is relevant or irrelevant. Only return 1 or 0."
    },
    #{"role": "user", "content": f"{prompt}\nHere is the text: {positive_example}"},
    #{"role": "assistant", "content": "Yes"},
    #{"role": "user", "content": f"Good job!"},
    #{"role": "user", "content": f"{prompt}\nHere is the text: {negative_example}"},
    #{"role": "assistant", "content": "No"},
    #{"role": "user", "content": f"Good job!"},
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]


    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        temperature=0,
    )

    print(response["choices"][0]["message"]["content"])
    print(response["usage"]["total_tokens"])
    in_df.loc[in_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
    time.sleep(0.5)

1
677
1
664
1
697
1
694
1
672
1
818
1
641
1
700
1
627
1
665
1
640
0
598
1
614
0
598
0
636
1
626
0
670
0
629
0
651
1
624


In [11]:
in_df

Unnamed: 0,#,Statement,Human 1,Human 2,CorrectText
0,1,A few years ago I was an instructor for teens....,Y,Y,1
1,2,A situation in which I was in power over other...,Y,Y,1
2,3,"At the job I used to have, the manager left me...",Y,Y,1
3,4,"For sometime my wife wanted her own car, but s...",Y,Y,1
4,5,"I am a professor, so I have power over the stu...",Y,Y,1
5,6,I felt that I had power over a person when I w...,Y,Y,1
6,7,I had power at a former job as I was in manage...,Y,Y,1
7,8,I was in a position to evaluate two employees....,Y,Y,1
8,9,"I am in a management role, this is part of my ...",Y,Y,1
9,10,I have a nephew and two nieces that I babysat ...,Y,Y,1


In [14]:
in_df.to_excel(writer, sheet_name='High Power', index=False)

## Low Power

In [15]:

# Select columns to read
in_df = pd.read_excel(input_file, sheet_name='Low Power', usecols="B:E", dtype=dtypes, skiprows=3)

# Fill any empty cells
in_df.fillna('', inplace=True)

In [16]:
# add a new column called "CorrectText" to the dataframe
in_df['CorrectText'] = ""

In [17]:
prompt = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 

Low power
For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall a particular incident in which someone else had power over you. By power, we mean a situation in which someone had control over your ability to get something you wanted, or was in a position to evaluate you. Please describe a situation in which you did not have  power—what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer was being supervised by another individual, was subordinate to another individual, and/or their outcomes were controlled by another individual.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer was not controlled by another individual or individuals, their ability get something they wanted was not controlled by of another person or persons, or they were not evaluated by another individual or individuals. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
"""

# loop through the "Text"" column of the first 20 rows
for i in range(0,20):
    text = in_df.iloc[i]['Statement']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"You are a helpful research assistant assessing whether a sample is relevant or irrelevant. Only return 1 or 0."
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]


    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        temperature=0,
    )

    print(response["choices"][0]["message"]["content"])
    print(response["usage"]["total_tokens"])
    in_df.loc[in_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
    time.sleep(0.5)

1
611
1
622
1
653
1
656
1
687
1
703
1
681
1
646
1
611
1
672
1
705
0
662
1
778
0
593
0
636
0
625
1
674
0
607
0
613
0
657


In [18]:
in_df

Unnamed: 0,#,Statement,Human 1,Human 2,CorrectText
0,1.0,I was at work and I had a boss that instructed...,Y,Y,1.0
1,2.0,I was bullied for months when I was younger on...,Y,Y,1.0
2,3.0,I was in a position where I was eligible for a...,Y,Y,1.0
3,4.0,I was put on a marketing project at my job and...,Y,Y,1.0
4,5.0,I was told to wait in my bosses office because...,Y,Y,1.0
5,6.0,"In my most recent job, I have been forced into...",Y,Y,1.0
6,7.0,It was my end of the year performance evaluati...,Y,Y,1.0
7,8.0,Last June I was arrested and interrogated. The...,Y,Y,1.0
8,9.0,Last year i was in lie for a promotion so i wa...,Y,Y,1.0
9,10.0,my boss knew I wanted a reassignment and that ...,Y,Y,1.0


In [19]:
in_df.to_excel(writer, sheet_name='Low Power', index=False)

## Equal Power

In [20]:

# Select columns to read
in_df = pd.read_excel(input_file, sheet_name='Equal Power', usecols="B:E", dtype=dtypes, skiprows=3)

# Fill any empty cells
in_df.fillna('', inplace=True)

In [21]:
# add a new column called "CorrectText" to the dataframe
in_df['CorrectText'] = ""

In [23]:
prompt = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 

Equal power

For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall a particular incident in which you had the same amount of power as another individual or individuals. By power, we mean a situation in which both of you had the same control over the other person or persons to get something they wanted, or both of you were in a position to evaluate each other. Please describe this situation in which you had equal power--what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer and the other individual or individuals had the same control over each other, and/or their outcomes were controlled by each other to a similar extent. 
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer and another individual or individuals did have unequal power, their ability to get something they wanted was unequal, or they were either evaluated by another individual or individuals or evaluated them. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
"""




# loop through the "Text"" column of the first 20 rows
for i in range(0,20):
    text = in_df.iloc[i]['Statement']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"You are a helpful research assistant assessing whether a sample is relevant or irrelevant. Only return 1 or 0."
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]


    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        temperature=0,
    )

    print(response["choices"][0]["message"]["content"])
    print(response["usage"]["total_tokens"])
    in_df.loc[in_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
    time.sleep(0.5)

1
669
1
632
1
654
1
710
1
662
1
712
1
680
1
663
1
664
1
681
1
663
1
675
1
676
1
675
0
652
0
625
1
642
0
607
0
621
0
647


In [24]:
in_df

Unnamed: 0,#,Statement,Human 1,Human 2,CorrectText
0,1.0,"Last year, I had to work on a group presentati...",Y,Y,1.0
1,2.0,I would say I always have equal power in my ma...,Y,Y,1.0
2,3.0,I was in a comittee and we all got a vote on w...,Y,Y,1.0
3,4.0,"In a design class in college, we were paired u...",Y,Y,1.0
4,5.0,I was working with a coworker who is of equal ...,Y,Y,1.0
5,6.0,"Once in college, my classmates and I were requ...",Y,Y,1.0
6,7.0,The other day I was on a hike in the woods wit...,Y,Y,1.0
7,8.0,Me and my wife went to the mall last week. W...,Y,Y,1.0
8,9.0,"On Thursdays at work, all of the staff members...",Y,Y,1.0
9,10.0,The situation that comes to mind would be a st...,Y,Y,1.0


In [25]:
in_df.to_excel(writer, sheet_name='Equal Power', index=False)

## Grocery Store

In [4]:

# Select columns to read
in_df = pd.read_excel(input_file, sheet_name='Grocery Store', usecols="B:E", dtype=dtypes, skiprows=3)

# Fill any empty cells
in_df.fillna('', inplace=True)

NameError: name 'input_file' is not defined

In [27]:
# add a new column called "CorrectText" to the dataframe
in_df['CorrectText'] = ""

In [28]:
prompt = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 

Grocery store

For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall your last trip to the grocery store. Please describe this situation-- what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer went to the grocery store.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer did not report on an experience in the grocery store. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
"""

# loop through the "Text"" column of the first 20 rows
for i in range(0,20):
    text = in_df.iloc[i]['Statement']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"You are a helpful research assistant assessing whether a sample is relevant or irrelevant. Only return 1 or 0."
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]


    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        temperature=0,
    )

    print(response["choices"][0]["message"]["content"])
    print(response["usage"]["total_tokens"])
    in_df.loc[in_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
    time.sleep(0.5)

1
552
1
584
1
617
1
583
1
608
1
573
1
562
1
578
1
597
1
551
0
507
0
502
0
505
0
506
0
538
0
502
0
510
1
542
0
559
0
534


In [29]:
in_df

Unnamed: 0,#,Statement,Human 1,Human 2,CorrectText
0,1.0,I went to the gorcery store with my boyfriend ...,Y,Y,1.0
1,2.0,I went to my local Safeway and purchased nearl...,Y,Y,1.0
2,3.0,The last time I went to the grocery store was ...,Y,Y,1.0
3,4.0,"I remember going through the checkout line, I ...",Y,Y,1.0
4,5.0,The last time I went to the grocery store it w...,Y,Y,1.0
5,6.0,I went to a new food store to buy coffee. I jo...,Y,Y,1.0
6,7.0,I went to the grocery sore a few days ago. It ...,Y,Y,1.0
7,8.0,The last time I went to a grocery store I was ...,Y,Y,1.0
8,9.0,When i went to the one of the grocery shop whi...,Y,Y,1.0
9,10.0,I hate going to the grocery store but the last...,Y,Y,1.0


In [30]:
in_df.to_excel(writer, sheet_name='Grocery', index=False)

## Last Meal

In [31]:

# Select columns to read
in_df = pd.read_excel(input_file, sheet_name='Last Meal', usecols="B:E", dtype=dtypes, skiprows=3)

# Fill any empty cells
in_df.fillna('', inplace=True)

In [32]:
# add a new column called "CorrectText" to the dataframe
in_df['CorrectText'] = ""

In [33]:
prompt = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 


Last meal

For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall your last meal. Please describe this situation-- what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer ate a meal.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer did not report on an experience eating a meal. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
"""


# loop through the "Text"" column of the first 20 rows
for i in range(0,20):
    text = in_df.iloc[i]['Statement']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"You are a helpful research assistant assessing whether a sample is relevant or irrelevant. Only return 1 or 0."
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]


    response = openai.ChatCompletion.create(
        model=MODEL,
        messages=messages,
        temperature=0,
    )

    print(response["choices"][0]["message"]["content"])
    print(response["usage"]["total_tokens"])
    in_df.loc[in_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
    time.sleep(0.5)

1
532
1
549
1
537
1
567
1
627
1
500
1
524
1
523
1
531
1
546
0
495
0
498
0
517
0
545
0
532
0
498
0
562
0
570
0
495
0
542


In [34]:
in_df

Unnamed: 0,#,Statement,Human 1,Human 2,CorrectText
0,1.0,I ate two untoasted pop tarts for breakfast al...,Y,Y,1.0
1,2.0,I ate 11 Stretch Island Fruit Leather strips f...,Y,Y,1.0
2,3.0,I have been trying the one meal a day diet. T...,Y,Y,1.0
3,4.0,The last meal I had was breakfast this morning...,Y,Y,1.0
4,5.0,I finally learn how to make Chinese fried dump...,Y,Y,1.0
5,6.0,Ham and egg sandwich with coffee.,Y,Y,1.0
6,7.0,The last meal that I had was breakfast. I ate ...,Y,Y,1.0
7,8.0,"Pasta, i had it for breakfast and it was made ...",Y,Y,1.0
8,9.0,"I had cereal, chobani mango yogurt, and a mix...",Y,Y,1.0
9,10.0,"This morning I was hungry, but I did not feel ...",Y,Y,1.0


In [35]:
in_df.to_excel(writer, sheet_name='Last Meal', index=False)

In [37]:

writer.close()

  writer.save()
  warn("Calling close() on already closed file.")


# Actual

In [5]:
dtypes = {'Year': str, 
          'Sample': str, 
          'Researcher': str,
          'SourceFile': str,
          'Condition': str,
          'Text': str, 
          'Age': str, 
          'Female': str,
          'Country': str,
         }

# Put input file here
input_file = os.path.join("..", "data", "raw", "Power Recall Prime Text Collection_v01.xlsx" )

# Select columns to read
in_df = pd.read_excel(input_file, sheet_name='Data', usecols="A:I", dtype=dtypes)

# Fill any empty cells
in_df.fillna('', inplace=True)

In [6]:
in_df

Unnamed: 0,Year,Sample,Researcher,SourceFile,Condition,Text,Age,Female,Country
0,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,C (Grocery),My last trip to the grocery store was just yes...,48,1,USA
1,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,C (Grocery),I went to the gorcery store with my boyfriend ...,20,1,USA
2,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,C (Grocery),I went to my local Safeway and purchased nearl...,23,0,USA
3,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,C (Grocery),The last time I went to the grocery store was ...,25,0,USA
4,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,C (Grocery),"Yesterday, I went to Wal-Mart to shop for some...",24,0,USA
...,...,...,...,...,...,...,...,...,...
8395,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,i plow ran me off the road in when is was snow...,48,1,USA
8396,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,Recently I was trying to get a raise at work a...,28,0,USA
8397,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,I wanted to do a report for my company that I ...,29,1,USA
8398,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,"Back about 8 years ago, I was not working outs...",47,1,USA


In [7]:
# add a new column called "CorrectText" to the dataframe
in_df['CorrectText'] = ""

In [8]:
in_df['Condition'].unique()

array(['C (Grocery)', 'HP', 'LP', 'C (Last Meal)', 'C (Equal Power)'],
      dtype=object)

In [9]:
hp_df = in_df[in_df['Condition']=='HP']
lp_df = in_df[in_df['Condition']=='LP']
ep_df = in_df[in_df['Condition']=='C (Equal Power)']
gr_df = in_df[in_df['Condition']=='C (Grocery)']
lm_df = in_df[in_df['Condition']=='C (Last Meal)']

## Last Meal

In [71]:
len(lm_df)

865

In [73]:
lm_df = lm_df.iloc[:624]

In [74]:
lm_df

Unnamed: 0,Year,Sample,Researcher,SourceFile,Condition,Text,Age,Female,Country,CorrectText
4060,2020,MTurk,"du Plessis, Nguyen, Foulk, & Schaerer",Power__Trust_-_Cesarini_et_al_Trust_Game_-_Rol...,C (Last Meal),"I had a twix bar, a cookie, and a bag of durit...",33,0,USA,1
4061,2020,MTurk,"du Plessis, Nguyen, Foulk, & Schaerer",Power__Trust_-_Cesarini_et_al_Trust_Game_-_Rol...,C (Last Meal),The last meal I had was dinner last night. My ...,31,1,USA,1
4062,2020,MTurk,"du Plessis, Nguyen, Foulk, & Schaerer",Power__Trust_-_Cesarini_et_al_Trust_Game_-_Rol...,C (Last Meal),good one,29,0,USA,0
4063,2020,MTurk,"du Plessis, Nguyen, Foulk, & Schaerer",Power__Trust_-_Cesarini_et_al_Trust_Game_-_Rol...,C (Last Meal),I ate 11 Stretch Island Fruit Leather strips f...,23,1,USA,1
4064,2020,MTurk,"du Plessis, Nguyen, Foulk, & Schaerer",Power__Trust_-_Cesarini_et_al_Trust_Game_-_Rol...,C (Last Meal),This morning I ate two scrambled eggs and two ...,35,0,USA,1
...,...,...,...,...,...,...,...,...,...,...
6668,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),My wife was feeling not well so i made a fruit...,40,0,USA,1
6669,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),I had a frozen pizza last night for dinner. Al...,45,1,USA,1
6670,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),We had a very nice stir fry that my husband co...,58,1,USA,1
6671,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),"I had rice, beans, and meat.I had dinner with ...",35,0,USA,1


In [80]:
preamble = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 
"""

prompt = """
Last meal

For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall your last meal. Please describe this situation-- what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer ate a meal.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer did not report on an experience eating a meal. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
Only return 1 or 0. Do not return anything else.
"""

max_retries = 5  # Maximum number of retries
retry_delay = 10  # Delay in seconds between retries

# loop through the "Text"" column of the first 20 rows
for i in range(599, 624):
    text = lm_df.iloc[i]['Text']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"{preamble}"
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]


    retries = 0
    while retries < max_retries:
        try:
            response = response = openai.ChatCompletion.create(
                model=MODEL,
                messages=messages,
                request_timeout=60,
                temperature=0,
            )
            
            print(f"Row {i}: {response['choices'][0]['message']['content']}")
            print(response["usage"]["total_tokens"])
            lm_df.loc[lm_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
            time.sleep(0.75)
            retries = max_retries
            
        except requests.exceptions.RequestException as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except requests.exceptions.ReadTimeoutError as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIError as e:
            #Handle API error here, e.g. retry or log
            print(f"OpenAI API returned an API Error: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIConnectionError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)

Row 599: 1
617


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lm_df.loc[lm_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]


Row 600: 1
517
Row 601: 1
521
Row 602: 1
505
Row 603: 1
528
Row 604: 1
654
Row 605: 1
509
Row 606: 0
490
Row 607: 1
513
Row 608: 1
509
Row 609: 1
510
Row 610: 0
514
Row 611: 1
489
Row 612: 1
513
Row 613: 1
586
Row 614: 1
539
Row 615: 1
530
Row 616: 1
517
Row 617: 1
544
Row 618: 1
571
Row 619: 1
502
Row 620: 1
531
Row 621: 1
506
Row 622: 1
499
Row 623: 1
495


In [32]:
mask1 = lm_df['CorrectText'].str.contains("\(1\)")
lm_df.loc[mask1, 'CorrectText'] = '1'

In [38]:
mask1 = lm_df['CorrectText']==1
lm_df.loc[mask1, 'CorrectText'] = '1'

In [42]:
mask0 = (lm_df['CorrectText']!='1') & (lm_df['CorrectText']!='0')
lm_df.loc[mask0, 'CorrectText'] = '0'

In [81]:
lm_df['CorrectText'].value_counts()

1    500
0    124
Name: CorrectText, dtype: int64

In [82]:
# write the dataframe to an XLSX file
lm_df.to_excel('LastMeal_Tagged_PNAS.xlsx', index=False)

## Grocery

In [24]:
len(gr_df)

1440

In [31]:
preamble = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 
"""

prompt = """
For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall your last trip to the grocery store. Please describe this situation-- what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer went to the grocery store.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer did not report on an experience in the grocery store. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
Only return 1 or 0. Do not return anything else.
"""



max_retries = 5  # Maximum number of retries
retry_delay = 10  # Delay in seconds between retries

# loop through the "Text"" column of the first 20 rows
for i in range(1409,1440):
    text = gr_df.iloc[i]['Text']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"{preamble}"
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]

    retries = 0
    while retries < max_retries:
        try:
            response = response = openai.ChatCompletion.create(
                model=MODEL,
                messages=messages,
                request_timeout=60,
                temperature=0,
            )
            
            print(f"Row {i}: {response['choices'][0]['message']['content']}")
            print(response["usage"]["total_tokens"])
            gr_df.loc[gr_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
            time.sleep(0.75)
            retries = max_retries
            
        except requests.exceptions.RequestException as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except requests.exceptions.ReadTimeoutError as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIError as e:
            #Handle API error here, e.g. retry or log
            print(f"OpenAI API returned an API Error: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIConnectionError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
    

Row 1409: 1
518


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gr_df.loc[gr_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]


Row 1410: 0
504
Row 1411: 1
567
Row 1412: 1
570
Row 1413: 1
529
Row 1414: 1
511
Row 1415: 1
545
Row 1416: 1
541
Row 1417: 1
540
Row 1418: 1
541
Row 1419: 1
622
Row 1420: 1
523
Row 1421: 1
630
Row 1422: 1
539
Row 1423: 1
543
Row 1424: 1
578
Row 1425: 1
515
Row 1426: 1
521
Row 1427: 1
560
Row 1428: 1
522
Row 1429: 1
536
Row 1430: 1
524
Row 1431: 1
529
Row 1432: 1
520
Row 1433: 1
543
Row 1434: 1
574
Row 1435: 1
601
Row 1436: 1
580
Row 1437: 1
560
Row 1438: 1
604
Row 1439: 1
643


In [55]:
mask1 = gr_df['CorrectText'].str.contains("\(1\)")
gr_df.loc[mask1, 'CorrectText'] = '1'

mask1 = gr_df['CorrectText'].str.contains("is relevant")
gr_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gr_df.loc[mask1, 'CorrectText'] = '1'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gr_df.loc[mask1, 'CorrectText'] = '1'


In [64]:
mask0 = (gr_df['CorrectText']!='1') & (gr_df['CorrectText']!='0')
gr_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gr_df.loc[mask0, 'CorrectText'] = '0'


In [65]:
gr_df['CorrectText'].value_counts()

1    1318
0     122
Name: CorrectText, dtype: int64

In [66]:
# write the dataframe to an XLSX file
gr_df.to_excel('Grocery_Tagged_PNAS.xlsx', index=False)

## Equal Power

In [33]:
len(ep_df)

434

In [39]:
preamble = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 
"""

prompt = """
For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall a particular incident in which you had the same amount of power as another individual or individuals. By power, we mean a situation in which both of you had the same control over the other person or persons to get something they wanted, or both of you were in a position to evaluate each other. Please describe this situation in which you had equal power--what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer and the other individual or individuals had the same control over each other, and/or their outcomes were controlled by each other to a similar extent. 
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer and another individual or individuals did have unequal power, their ability to get something they wanted was unequal, or they were either evaluated by another individual or individuals or evaluated them. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
Only return 1 or 0. Do not return anything else.
"""

max_retries = 5  # Maximum number of retries
retry_delay = 10  # Delay in seconds between retries

# loop through the "Text"" column of the first 20 rows
for i in range(241,434):
    text = ep_df.iloc[i]['Text']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"{preamble}"
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]

    retries = 0
    while retries < max_retries:
        try:
            response = response = openai.ChatCompletion.create(
                model=MODEL,
                messages=messages,
                request_timeout=60,
                temperature=0,
            )
            
            print(f"Row {i}: {response['choices'][0]['message']['content']}")
            print(response["usage"]["total_tokens"])
            ep_df.loc[ep_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
            time.sleep(0.75)
            retries = max_retries
            
        except requests.exceptions.RequestException as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIError as e:
            #Handle API error here, e.g. retry or log
            print(f"OpenAI API returned an API Error: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIConnectionError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
    

Row 241: 1
657


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ep_df.loc[ep_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]


Row 242: 0
624
Row 243: 1
629
Row 244: 1
686
Row 245: 1
675
Row 246: 1
688
Row 247: 1
671
Row 248: 1
733
Row 249: 1
626
Row 250: 1
662
Row 251: 1
644
Row 252: 1
673
Row 253: 1
704
Row 254: 1
728
Row 255: 1
686
Row 256: 1
745
Row 257: 1
653
Row 258: 1
733
Row 259: 1
670
Row 260: 0
734
Row 261: 1
649
Row 262: 1
691
Row 263: 1
660
Row 264: 1
671
Row 265: 1
662
Row 266: 1
650
Row 267: 1
641
Row 268: 1
654
Row 269: 1
654
Row 270: 1
656
Row 271: 0
593
Row 272: 0
612
Row 273: 0
648
Row 274: 1
641
Row 275: 0
598
Row 276: 1
678
Row 277: 1
675
Row 278: 1
779
Row 279: 1
688
Row 280: 1
693
Row 281: 0
659
Row 282: 1
715
Row 283: 1
641
Row 284: 1
631
Row 285: 1
659
Row 286: 1
699
Row 287: 1
673
Row 288: 0
624
Row 289: 1
641
Row 290: 1
757
Row 291: 1
630
Row 292: 1
644
Row 293: 0
654
Row 294: 1
731
Row 295: 0
688
Row 296: 0
601
Row 297: 0
713
Row 298: 1
664
Row 299: 1
643
Row 300: 1
671
Row 301: 1
659
Row 302: 1
647
Row 303: 1
628
Row 304: 1
633
Row 305: 0
625
Row 306: 1
655
Row 307: 0
625
Row 308: 1

In [60]:
ep_df['CorrectText'].value_counts()

1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

In [107]:
mask1 = ep_df['CorrectText'].str.contains("\(1\)")
ep_df.loc[mask1, 'CorrectText'] = '1'

mask1 = ep_df['CorrectText'].str.contains("is relevant")
ep_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ep_df.loc[mask1, 'CorrectText'] = '1'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ep_df.loc[mask1, 'CorrectText'] = '1'


In [61]:
mask0 = (ep_df['CorrectText']!='1') & (ep_df['CorrectText']!='0')
ep_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ep_df.loc[mask0, 'CorrectText'] = '0'


In [62]:
# write the dataframe to an XLSX file
ep_df.to_excel('EqualPower_Tagged_PNAS.xlsx', index=False)

## Low Power

In [194]:
len(lp_df)

3460

In [195]:
lp_df

Unnamed: 0,Year,Sample,Researcher,SourceFile,Condition,Text,Age,Female,Country,CorrectText
220,2014,MTurk,Pitesa & Thau,Power Attribution - Study 1,LP,A situation in which someone else has power ov...,26,1,USA,
221,2014,MTurk,Pitesa & Thau,Power Attribution - Study 1,LP,A situation in which someone had control over ...,28,0,USA,
222,2014,MTurk,Pitesa & Thau,Power Attribution - Study 1,LP,A situation where I did not have power is when...,23,1,USA,
223,2014,MTurk,Pitesa & Thau,Power Attribution - Study 1,LP,About two years ago I met a lovely young woman...,29,0,USA,
224,2014,MTurk,Pitesa & Thau,Power Attribution - Study 1,LP,"At my work, when I was starting out my supervi...",33,0,USA,
...,...,...,...,...,...,...,...,...,...,...
8395,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,i plow ran me off the road in when is was snow...,48,1,USA,
8396,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,Recently I was trying to get a raise at work a...,28,0,USA,
8397,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,I wanted to do a report for my company that I ...,29,1,USA,
8398,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,LP,"Back about 8 years ago, I was not working outs...",47,1,USA,


In [42]:
import socket

In [44]:
preamble = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 
"""

prompt = """
For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall a particular incident in which someone else had power over you. By power, we mean a situation in which someone had control over your ability to get something you wanted, or was in a position to evaluate you. Please describe a situation in which you did not have  power—what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer was being supervised by another individual, was subordinate to another individual, and/or their outcomes were controlled by another individual.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer was not controlled by another individual or individuals, their ability get something they wanted was not controlled by of another person or persons, or they were not evaluated by another individual or individuals. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
Only return 1 or 0. Do not return anything else.
"""

max_retries = 5  # Maximum number of retries
retry_delay = 15  # Delay in seconds between retries

# loop through the "Text"" column of the first 20 rows
for i in range(779,3460):
    text = lp_df.iloc[i]['Text']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"{preamble} "
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]

    retries = 0
    while retries < max_retries:
        try:
            response = response = openai.ChatCompletion.create(
                model=MODEL,
                messages=messages,
                request_timeout=60,
                temperature=0,
            )
            
            print(f"Row {i}: {response['choices'][0]['message']['content']}")
            print(response["usage"]["total_tokens"])
            lp_df.loc[lp_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
            time.sleep(0.75)
            retries = max_retries
            
        except requests.exceptions.RequestException as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except Exception as e:
            #Handle API error here, e.g. retry or log
            print(f"OpenAI API returned an API Error: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIError as e:
            #Handle API error here, e.g. retry or log
            print(f"OpenAI API returned an API Error: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIConnectionError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.ServiceUnavailableError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
    

Row 779: 0
696


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[lp_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]


Row 780: 1
608
Row 781: 1
653
Row 782: 1
826
Row 783: 1
686
Row 784: 1
612
Row 785: 1
632
Row 786: 1
607
Row 787: 1
712
Row 788: 1
632
Row 789: 1
730
Row 790: 1
723
Row 791: 1
627
Row 792: 1
692
Row 793: 1
622
Row 794: 1
647
Row 795: 1
656
Row 796: 1
650
Row 797: 1
794
Row 798: 1
735
Row 799: 1
823
Row 800: 1
699
Row 801: 1
608
Row 802: 1
744
Row 803: 1
685
Row 804: 1
624
Row 805: 1
644
Row 806: 1
629
Row 807: 1
654
Row 808: 1
622
Row 809: 1
625
Row 810: 1
632
Row 811: 1
630
Row 812: 1
646
Row 813: 1
691
Row 814: 1
608
Row 815: 1
709
Row 816: 1
669
Row 817: 1
654
Row 818: 1
695
Row 819: 1
606
Row 820: 1
663
Row 821: 1
645
Row 822: 1
622
Row 823: 1
674
Row 824: 1
693
Row 825: 1
682
Row 826: 1
674
Row 827: 1
635
Row 828: 1
669
Row 829: 1
629
Row 830: 1
593
Row 831: 1
672
Row 832: 1
593
Row 833: 1
614
Row 834: 1
714
Row 835: 0
620
Row 836: 1
693
Row 837: 1
636
Row 838: 1
627
Row 839: 1
640
Row 840: 1
651
Row 841: 1
707
Row 842: 1
688
Row 843: 1
647
Row 844: 1
607
Row 845: 1
615
Row 846: 1

Row 1306: 1
740
Row 1307: 1
593
Row 1308: 1
640
Row 1309: 1
635
Row 1310: 1
623
Row 1311: 1
809
Row 1312: 1
644
Row 1313: 1
666
Row 1314: 1
643
Row 1315: 1
1019
Row 1316: 1
732
Row 1317: 1
669
Row 1318: 1
688
Row 1319: 0
639
Row 1320: 1
622
Row 1321: 1
742
Row 1322: 1
615
Row 1323: 1
624
Row 1324: 1
648
Row 1325: 0
720
Row 1326: 1
617
Row 1327: 1
627
Row 1328: 1
667
Row 1329: 1
696
Row 1330: 1
747
Row 1331: 1
622
Row 1332: 1
632
Row 1333: 1
648
Row 1334: 1
618
Row 1335: 1
702
Row 1336: 1
655
Row 1337: 1
652
Row 1338: 1
629
Row 1339: 1
626
Row 1340: 1
664
Row 1341: 1
629
Row 1342: 1
618
Row 1343: 1
658
Row 1344: 1
590
Row 1345: 1
626
Row 1346: 1
698
Row 1347: 1
681
Row 1348: 1
607
Row 1349: 1
624
Row 1350: 1
629
Row 1351: 1
735
Row 1352: 1
655
Row 1353: 1
649
Row 1354: 1
683
Row 1355: 1
630
Row 1356: 1
675
Row 1357: 1
658
Row 1358: 1
695
Row 1359: 1
714
Row 1360: 1
685
Row 1361: 1
638
Row 1362: 1
672
Row 1363: 1
629
Row 1364: 1
780
Row 1365: 1
641
Row 1366: 1
646
Row 1367: 1
644
Row 136

Row 1798: 1
653
Row 1799: 1
669
Row 1800: 1
618
Row 1801: 1
645
Row 1802: 1
656
Row 1803: 1
626
Row 1804: 1
644
Row 1805: 1
701
Row 1806: 1
618
Row 1807: 1
664
Row 1808: 1
653
Row 1809: 0
584
Row 1810: 1
644
Row 1811: 1
616
Row 1812: 0
604
Row 1813: 1
605
Row 1814: 1
739
Row 1815: 0
723
Row 1816: 1
642
Row 1817: 1
636
Row 1818: 1
617
Row 1819: 1
601
Row 1820: 0
612
Row 1821: 1
702
Row 1822: 1
623
Row 1823: 1
618
Row 1824: 1
627
Row 1825: 1
669
Row 1826: 1
674
Row 1827: 1
631
Row 1828: 1
676
Row 1829: 1
652
Row 1830: 1
692
Row 1831: 0
696
Row 1832: 1
655
Row 1833: 0
599
Row 1834: 1
610
Row 1835: 1
633
Row 1836: 1
716
Row 1837: 0
586
Row 1838: 1
686
Row 1839: 1
656
Row 1840: 1
578
Row 1841: 0
795
Row 1842: 1
682
Row 1843: 1
628
Row 1844: 1
618
Row 1845: 1
692
Row 1846: 1
605
Row 1847: 1
622
Row 1848: 1
610
Row 1849: 1
695
Row 1850: 1
674
Row 1851: 1
690
Row 1852: 1
600
Row 1853: 1
690
Row 1854: 0
583
Row 1855: 1
688
Row 1856: 1
698
Row 1857: 1
689
Row 1858: 1
678
Row 1859: 1
620
Row 1860

Row 2276: 1
602
Row 2277: 1
738
Row 2278: 1
624
Row 2279: 1
630
Row 2280: 1
690
Row 2281: 1
656
Row 2282: 1
668
Row 2283: 1
670
Row 2284: 1
612
Row 2285: 1
636
Row 2286: 1
757
Row 2287: 1
620
Row 2288: 1
641
Row 2289: 1
629
Row 2290: 1
627
Row 2291: 1
653
Row 2292: 1
626
Row 2293: 1
640
Row 2294: 1
583
Row 2295: 1
627
Row 2296: 1
656
Row 2297: 1
622
Row 2298: 1
592
Row 2299: 1
662
Row 2300: 1
648
Row 2301: 1
638
Row 2302: 1
642
Row 2303: 1
669
Row 2304: 1
632
Row 2305: 1
666
Row 2306: 1
651
Row 2307: 1
646
Row 2308: 1
613
Row 2309: 1
650
Row 2310: 1
601
Row 2311: 1
616
Row 2312: 1
601
Row 2313: 1
647
Row 2314: 1
692
Row 2315: 1
609
Row 2316: 0
581
Row 2317: 0
577
Row 2318: 1
585
Row 2319: 1
617
Row 2320: 0
577
Row 2321: to get something you wanted or was in a position to evaluate you. Please describe a situation in which you did not have power - what happened, how you felt, etc.
634
Row 2322: 1
598
Row 2323: 0
711
Row 2324: 1
586
Row 2325: 0
603
Row 2326: 1
708
Row 2327: 0
596
Row 2328

Row 2768: 1
626
Row 2769: 1
630
Row 2770: 1
639
Row 2771: 1
646
Row 2772: 1
663
Row 2773: 1
589
Row 2774: 1
710
Row 2775: 1
675
Row 2776: 1
647
Row 2777: 1
654
Row 2778: 1
616
Row 2779: 1
747
Row 2780: 1
684
Row 2781: 1
787
Row 2782: 1
849
Row 2783: 1
726
Row 2784: 1
664
Row 2785: 1
654
Row 2786: 1
670
Row 2787: 1
670
Row 2788: 1
633
Row 2789: 1
601
Row 2790: 1
648
Row 2791: 1
633
Row 2792: 1
652
Row 2793: 1
656
Row 2794: 1
715
Row 2795: 1
706
Row 2796: 1
622
Row 2797: 1
609
Row 2798: 1
620
Row 2799: 1
668
Row 2800: 1
618
Row 2801: 1
694
Row 2802: 1
804
Row 2803: 1
621
Row 2804: 1
653
Row 2805: 0
600
Row 2806: 1
632
Row 2807: 1
706
Row 2808: 1
641
Row 2809: 1
611
Row 2810: 1
617
Row 2811: 1
611
Row 2812: 1
630
Row 2813: 1
627
Row 2814: 1
692
Row 2815: 1
610
Row 2816: 1
658
Row 2817: 1
673
Row 2818: 1
631
Row 2819: 1
767
Row 2820: 1
633
Row 2821: 1
685
Row 2822: 1
637
Row 2823: 1
653
Row 2824: 1
685
Row 2825: 1
660
Row 2826: 1
616
Row 2827: 1
694
Row 2828: 1
630
Row 2829: 1
711
Row 2830

Row 3165: 1
629
Row 3166: 1
627
Row 3167: 0
621
Row 3168: 0
587
Row 3169: 0
615
Row 3170: 1
640
Row 3171: 0
601
Row 3172: 1
608
Row 3173: 1
583
Row 3174: 0
598
Row 3175: 0
602
Row 3176: 0
585
Row 3177: 0
645
Row 3178: 1
613
Row 3179: 0
653
Row 3180: 1
694
Row 3181: 1
809
Row 3182: 1
761
Row 3183: 0
624
Row 3184: 0
630
Row 3185: 0
616
Row 3186: 0
607
Row 3187: 1
625
Row 3188: 0
625
Row 3189: 0
598
Row 3190: 0
603
Row 3191: 1
639
Row 3192: 1
599
Row 3193: 1
612
Row 3194: 1
673
Row 3195: 1
659
Row 3196: 1
622
Row 3197: 1
602
Row 3198: 1
622
Row 3199: 0
611
Row 3200: 0
645
Row 3201: 0
658
Row 3202: 0
590
Row 3203: 1
690
Row 3204: 0
575
Row 3205: 1
622
Row 3206: 1
643
Row 3207: 1
620
Row 3208: 1
662
Row 3209: 0
631
Row 3210: 1
639
Row 3211: 1
666
Row 3212: 1
652
Row 3213: 1
654
Row 3214: 1
684
Row 3215: 1
735
Row 3216: is out of my control. My boss had the power to assign the project and evaluate our performance. I felt frustrated and demotivated because I knew that my colleague had more ex

In [78]:
mask0 = (lp_df['CorrectText'].str.contains("classified as irrelevant")) | (lp_df['CorrectText'].str.contains("coded as irrelevant"))
lp_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask0, 'CorrectText'] = '0'


In [84]:
mask0 = (lp_df['CorrectText'].str.contains("classified as IRRELEVANT")) | (lp_df['CorrectText'].str.contains("coded as IRRELEVANT"))
lp_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask0, 'CorrectText'] = '0'


In [91]:
mask0 = (lp_df['CorrectText'].str.contains("essay is irrelevant")) | (lp_df['CorrectText'].str.contains("the output should be 0"))
lp_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask0, 'CorrectText'] = '0'


In [80]:
mask1 = (lp_df['CorrectText'].str.contains("classified as relevant")) | (lp_df['CorrectText'].str.contains("coded as relevant"))
lp_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask1, 'CorrectText'] = '1'


In [82]:
mask1 = (lp_df['CorrectText'].str.contains("classified as RELEVANT")) | (lp_df['CorrectText'].str.contains("coded as RELEVANT"))
lp_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask1, 'CorrectText'] = '1'


In [90]:
mask1 = (lp_df['CorrectText'].str.contains("essay is relevant")) | (lp_df['CorrectText'].str.contains("the output should be 1"))
lp_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask1, 'CorrectText'] = '1'


In [93]:
mask1 = (lp_df['CorrectText'].str.contains("coded as 1 \(relevant\)")) 
lp_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask1, 'CorrectText'] = '1'


In [57]:
mask0 = (lp_df['CorrectText']!='1') & (lp_df['CorrectText']!='0')
lp_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  lp_df.loc[mask0, 'CorrectText'] = '0'


In [58]:
lp_df['CorrectText'].value_counts()

1    3010
0     450
Name: CorrectText, dtype: int64

In [59]:
# write the dataframe to an XLSX file
lp_df.to_excel('LowPower_Tagged_PNAS.xlsx', index=False)

## High Power

In [8]:
len(hp_df)

2442

In [61]:
hp_df

Unnamed: 0,Year,Sample,Researcher,SourceFile,Condition,Text,Age,Female,Country,CorrectText
122,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A few years ago I was an instructor for teens;...,23,1,USA,Yes
123,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A few years ago I was working as a manager at ...,25,0,USA,Yes
124,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A few years ago on teh job I was assigned the ...,23,1,USA,Yes
125,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,"A few years back, before I ventured off to my ...",23,1,USA,Yes
126,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A situation in which i had power over other in...,28,1,USA,Yes
...,...,...,...,...,...,...,...,...,...,...
8088,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,HP,One example of an incident which I felt very p...,34,0,USA,Yes
8089,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,HP,I was put in charge of a project that was a hi...,46,0,USA,Yes
8090,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,HP,I was in charge of putting together a team for...,48,0,USA,Yes
8091,2019,MTurk,"Schaerer, He, & Baily Wolf",Reframing_LP_as_Opportunity_-_Recall_-_Study_1...,HP,I was the president of a local non profit frat...,44,0,USA,Yes


In [49]:
preamble = """
Background on power recall primes
For this task, you will be asked to annotate a sample of essays resulting from studies using recall primes. Before describing the task, we explain what we mean by “recall prime.” 
A recall prime involves having study participants recall a time in which they either had power over another person (high power condition), or in which someone else had power over them (low power condition). The high and low power condition recall tasks should prime (i.e., activate) the concept of high and low power, respectively. A person high in power can make decisions that determine the outcomes of some target, either by providing directions during a task, directly assigning resources to the target, or by simply evaluating the target. A person low in power is one whose outcomes are determined by others, either by receiving directions during a task, being assigned resources, or by being evaluated. These two power conditions are compared with a neutral task (control condition) in which people recall having equal power to someone else, or the last time they went to the grocery store, or what they had for their last meal. 
"""

prompt = """
For each essay in the sample, follow these instructions:
1. Carefully read the text of the essay, paying close attention to details. 
2. Classify each essay as relevant (1) or irrelevant (0)
Essays should be coded as RELEVANT if the essay can be classified as a direct response to the following prompt: “Please recall a particular incident in which you had power over another individual or individuals. By power, we mean a situation in which you controlled the ability of another person or persons to get something they wanted, or were in a position to evaluate those individuals. Please describe this situation in which you had power— what happened, how you felt, etc.” For example, this would include essays that describe a situation in which the writer was the supervisor of another individual, had authority over another individual, and/or was able to control the behavior or outcome of another individual.
Essays should be coded as IRRELEVANT if the essay cannot be classified as a direct response to the same prompt described above. For example, this would include essays that describe a situation in which the writer did not have power over another individual or individuals, did not control the ability of another person or persons to get something they wanted, or were not in a position to evaluate another individual or individuals. In addition, this should also include essays in which participants simply copied a significant portion of the prompt above, essays that contain gibberish, and/or essays that are too short to analyze. 

The output you should give is 1 if the essay is relevant, or 0 if the essay is irrelevant or undeterminable.
Only return 1 or 0. Do not return anything else.
"""

max_retries = 5  # Maximum number of retries
retry_delay = 15  # Delay in seconds between retries

# loop through the "Text"" column of the first 20 rows
for i in range(1328,2442):
    text = hp_df.iloc[i]['Text']
    #print(abstract)
    messages = [{
    "role": "system",
    "content": f"{preamble}"
    },
    {"role": "user", "content": f"{prompt}\nHere is the text: {text}"}
    ]

    retries = 0
    while retries < max_retries:
        try:
            response = response = openai.ChatCompletion.create(
                model=MODEL,
                messages=messages,
                temperature=0,
            )
            
            print(f"Row {i}: {response['choices'][0]['message']['content']}")
            print(response["usage"]["total_tokens"])
            hp_df.loc[hp_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]
    
            time.sleep(0.75)
            retries = max_retries
            
        except requests.exceptions.RequestException as e:
            print(f"API Error occurred: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIError as e:
            #Handle API error here, e.g. retry or log
            print(f"OpenAI API returned an API Error: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.APIConnectionError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
        except openai.error.ServiceUnavailableError as e:
            #Handle connection error here
            print(f"Failed to connect to OpenAI API: {e}")
            retries += 1
            print(f"Retrying API call {retries}/{max_retries}...")
            time.sleep(retry_delay)
    

Row 1328: 1
644


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hp_df.loc[hp_df.index[i], 'CorrectText'] = response["choices"][0]["message"]["content"]


Row 1329: 0
589
Row 1330: 1
681
Row 1331: 1
674
Row 1332: 1
631
Row 1333: 1
657
Row 1334: 1
649
Row 1335: 1
678
Row 1336: 1
633
Row 1337: 1
769
Row 1338: 1
639
Row 1339: 1
697
Row 1340: 1
627
Row 1341: 1
681
Row 1342: 0
610
Row 1343: 1
656
Row 1344: 1
694
Row 1345: 1
664
Row 1346: 1
669
Row 1347: 1
779
Row 1348: to get something they wanted, or were in a position to evaluate those individuals. Please describe this situation in which you had power— what happened, how you felt, etc.

Essay 1:
I remember a time when I was the manager of a team at work. I had the power to assign tasks and evaluate the performance of my team members. One incident that stands out is when I had to make a decision about promoting one of my team members. It was a tough decision, but ultimately I had the power to choose who would get the promotion. I felt a sense of responsibility and authority in that situation.

Output: 1

Essay 2:
I recall a time when I went to the grocery store last week. I was just a regula

Row 1694: 1
697
Row 1695: 1
666
Row 1696: 1
661
Row 1697: 1
646
Row 1698: 1
655
Row 1699: 1
721
Row 1700: 1
636
Row 1701: 1
619
Row 1702: 1
703
Row 1703: 1
707
Row 1704: 1
736
Row 1705: 1
617
Row 1706: 1
638
Row 1707: 1
643
Row 1708: 1
631
Row 1709: 1
679
Row 1710: 1
687
Row 1711: 1
661
Row 1712: 0
602
Row 1713: 1
689
Row 1714: 1
758
Row 1715: 1
711
Row 1716: 1
632
Row 1717: 1
707
Row 1718: 1
616
Row 1719: 1
606
Row 1720: 1
675
Row 1721: 1
631
Row 1722: 1
667
Row 1723: 1
686
Row 1724: 1
729
Row 1725: 1
684
Row 1726: 1
645
Row 1727: 1
632
Row 1728: 1
693
Row 1729: 1
636
Row 1730: 1
627
Row 1731: 1
625
Row 1732: 1
704
Row 1733: 1
740
Row 1734: 1
648
Row 1735: 1
633
Row 1736: 1
669
Row 1737: 1
709
Row 1738: 1
671
Row 1739: 1
624
Row 1740: 1
722
Row 1741: 1
649
Row 1742: 1
644
Row 1743: 1
637
Row 1744: 1
901
Row 1745: 1
650
Row 1746: 1
645
Row 1747: 1
616
Row 1748: 1
661
Row 1749: 1
855
Row 1750: 1
654
Row 1751: 1
639
Row 1752: 1
628
Row 1753: 1
608
Row 1754: 1
657
Row 1755: 1
657
Row 1756

Row 2181: 1
626
Row 2182: 0
600
Row 2183: 0
641
Row 2184: 0
598
Row 2185: 1
638
Row 2186: 1
662
Row 2187: 1
681
Row 2188: 1
739
Row 2189: 1
638
Row 2190: 1
688
Row 2191: 0
623
Row 2192: 1
625
Row 2193: 0
629
Row 2194: 1
658
Row 2195: 0
646
Row 2196: 1
700
Row 2197: 1
676
Row 2198: 0
586
Row 2199: 1
648
Row 2200: 1
704
Row 2201: 1
698
Row 2202: 0
608
Row 2203: 1
615
Row 2204: 0
616
Row 2205: 1
645
Row 2206: 1
599
Row 2207: 1
650
Row 2208: 0
587
Row 2209: 0
595
Row 2210: 1
669
Row 2211: 1
632
Row 2212: 0
645
Row 2213: 0
617
Row 2214: 0
598
Row 2215: 0
644
Row 2216: 1
617
Row 2217: 1
596
Row 2218: 1
718
Row 2219: 1
600
Row 2220: 1
631
Row 2221: 1
634
Row 2222: 1
612
Row 2223: 0
628
Row 2224: 1
690
Row 2225: 0
608
Row 2226: 0
601
Row 2227: 0
627
Row 2228: 1
635
Row 2229: 1
624
Row 2230: 1
592
Row 2231: 1
665
Row 2232: 1
601
Row 2233: 1
646
Row 2234: 1
638
Row 2235: 1
689
Row 2236: 1
648
Row 2237: 1
655
Row 2238: 1
629
Row 2239: controlled the ability of another person or persons to get som

In [55]:
hp_df['CorrectText'].value_counts()

1    2163
0     279
Name: CorrectText, dtype: int64

In [51]:
mask1 = (hp_df['CorrectText'].str.contains("classified as relevant")) | (hp_df['CorrectText'].str.contains("coded as relevant"))
hp_df.loc[mask1, 'CorrectText'] = '1'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hp_df.loc[mask1, 'CorrectText'] = '1'


In [53]:
mask0 = (hp_df['CorrectText']!='1') & (hp_df['CorrectText']!='0')
hp_df.loc[mask0, 'CorrectText'] = '0'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hp_df.loc[mask0, 'CorrectText'] = '0'


In [54]:
# write the dataframe to an XLSX file
hp_df.to_excel('HighPower_Tagged_PNAS.xlsx', index=False)

# All

In [84]:
all_df = pd.concat([hp_df, lp_df, ep_df, gr_df, lm_df])

In [108]:
all_df.reset_index(inplace=True)

In [146]:
old_df.to_excel('Power Recall Prime Text Collection Tagged PNAS.xlsx', index=False)

# Combined

In [131]:
old_df = pd.read_excel('Power Recall Prime Text Collection Tagged.xlsx', sheet_name='Sheet1', dtype=dtypes)

In [132]:
old_df['CorrectText'] = old_df['CorrectText'].replace({'Yes': 1, 'No': 0})

In [122]:
all_df.rename(columns={'CorrectText': 'Relevant'}, inplace=True)

In [133]:
old_df.rename(columns={'CorrectText': 'Relevant'}, inplace=True)

In [135]:
old_df.rename(columns={'Relevant': 'Relevant_Original'}, inplace=True)

In [143]:
old_df['Relevant_PNAS'] = all_df['Relevant']

In [142]:
mask0 = (old_df['Relevant_Original']!=1) & (old_df['Relevant_Original']!=0)
old_df.loc[mask0, 'Relevant_Original'] = '0'

In [144]:
columns_to_convert = ['Relevant_Original', 'Relevant_PNAS']
old_df[columns_to_convert] = old_df[columns_to_convert].astype(int)

In [145]:
old_df

Unnamed: 0,Year,Sample,Researcher,SourceFile,Condition,Text,Age,Female,Country,Relevant_Original,Relevant_PNAS
0,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A few years ago I was an instructor for teens;...,23,1,USA,1,1
1,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A few years ago I was working as a manager at ...,25,0,USA,1,1
2,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A few years ago on teh job I was assigned the ...,23,1,USA,1,1
3,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,"A few years back, before I ventured off to my ...",23,1,USA,1,1
4,2012,MTurk,Pitesa & Thau,p1_v3_s1_redo2,HP,A situation in which i had power over other in...,28,1,USA,1,1
...,...,...,...,...,...,...,...,...,...,...,...
8395,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),My wife was feeling not well so i made a fruit...,40,0,USA,0,1
8396,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),I had a frozen pizza last night for dinner. Al...,45,1,USA,1,1
8397,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),We had a very nice stir fry that my husband co...,58,1,USA,0,1
8398,2020,MTurk,"Foulk, De Pater, Schaerer, du Plessis, Lee, & ...",Power+and+Paranoia+R&R+-+Psych+Recall+-+MTurk+...,C (Last Meal),"I had rice, beans, and meat.I had dinner with ...",35,0,USA,1,1
