In [68]:
import dotenv
import os
import re
import os
import glob
import pandas as pd
from pathlib import Path
from openai import OpenAI
import json

dotenv.load_dotenv()

OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

q_and_a_behavior_df = pd.read_csv("behavior_level.csv")
q_and_a_experiment_df = pd.read_csv("experimental_level.csv")


master_db_df = pd.read_csv("MasterDB_Full.csv")
master_db_df["Paper_Exp_ID_better"] = master_db_df.Paper_Exp_ID.str.extract(
    r"([A-Z]\_\d+)"
)

In [113]:
file_dict = {}

current_directory = os.getcwd()

relative_path = "articles/"

folder_path = os.path.join(current_directory, relative_path)

markdown_pattern = "*.mmd"
markdown_files = glob.glob(os.path.join(folder_path, markdown_pattern))

pattern_id = r"([A-Z]\_\d+)"

for markdown_file in markdown_files:
    match = re.search(pattern_id, markdown_file)
    if match:
        code = match.group(1)
        if code in file_dict:
            file_dict[code].append(markdown_file)
        else:
            file_dict[code] = [markdown_file]
    else:
        print("No match found.")


def read_files_to_string(docs: list):
    text = ""
    for d in docs:
        text += Path(d).read_text()
    return text


In [181]:
inclusion_criteria = pd.read_csv('./raw_questions/Inclusion_Criteria.csv')
inclusion_criteria = inclusion_criteria.drop([0, 1, 2, 9, 15, 20])
inclusion_criteria


Unnamed: 0,Number,Step,Dimension,Description,Scale,Type,Location
3,1,Search,title,Title of the publication as captured by the se...,Free text,string,Pre-filled (R)
4,1,Search,authors,Authors of the publication as captured by the ...,Free text,string,Pre-filled (R)
5,1,Search,abstract,Abstract of the publication as captured by the...,Free text,string,Pre-filled (R)
6,1,Search,journal,Journal of the publication as captured by the ...,Free text,string,Pre-filled (R)
7,1,Search,year,Year of publication as captured by the search ...,Number,number,Pre-filled (R)
8,1,Search,doi,DOI as captured by the search engine; any empt...,Free text,string,Pre-filled (R)
10,1,Search,title_nudge,"Is ""nudge"", ""nudging"", or ""choice architecture...","[""Yes"", ""No""]",enum,Pre-filled (R)
11,1,Search,abstract_nudge,"Is ""nudge"", ""nudging"", or ""choice architecture...","[""Yes"", ""No""]",enum,Pre-filled (R)
12,1,Search,neither_nudge,"Is ""nudge"", ""nudging"", or ""choice architecture...","[""Yes"", ""No""]",enum,Pre-filled (R)
13,1,Search,index_paper,Assign an index number to the remaining public...,Number,number,Pre-filled (R)


In [165]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "article_criteria",
            "description": "A list of questions to ask to determine if an article meets the inclusion criteria for the meta-analysis.",
            "parameters": {
                "type": "object",
                "properties": {
                    "title": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
]

result_dict = {"properties": {}}
required = []

for index, row in inclusion_criteria.iterrows():
    dimension = row["Dimension"]
    description = row["Description"]
    prop_type = "string" if row["Type"] == "enum" else row["Type"]

    # Add properties to the dictionary
    result_dict["properties"][dimension] = {
        "description": description,
        "type": prop_type,
    }

    required.append(dimension)

    # If the type is 'Enum', add the 'enum' field with the enum values
    if row["Type"] == "enum" and row["Scale"] is not None:
        result_dict["properties"][dimension]["enum"] = json.loads(row["Scale"])


tools[0]["function"]["parameters"]["properties"] = result_dict["properties"]
tools[0]["function"]["parameters"]["required"] = required
# Print the resulting dictionary
print(result_dict)

{'properties': {'title': {'description': 'Title of the publication as captured by the search engine', 'type': 'string'}, 'authors': {'description': 'Authors of the publication as captured by the search engine', 'type': 'string'}, 'abstract': {'description': 'Abstract of the publication as captured by the search engine', 'type': 'string'}, 'journal': {'description': 'Journal of the publication as captured by the search engine', 'type': 'string'}, 'year': {'description': 'Year of publication as captured by the search engine (2018-2022)', 'type': 'number'}, 'doi': {'description': 'DOI as captured by the search engine; any empty DOIs filled out by the internal team', 'type': 'string'}, 'title_nudge': {'description': 'Is "nudge", "nudging", or "choice architecture" in the title? (It should be in the title or abstract per the search.)', 'type': 'string', 'enum': ['Yes', 'No']}, 'abstract_nudge': {'description': 'Is "nudge", "nudging", or "choice architecture" in the abstract? (It should be i

In [166]:
client = OpenAI()


def run_article_inclusion(article_id):
    article_text = read_files_to_string(file_dict["A_%d" % article_id])
    article_text = re.sub(r"MISSING_PAGE_EMPTY", "", article_text)

    messages = [
        {
            "role": "system",
            "content": "You are a researcher looking for articles to include in a meta-analysis. You have a set of inclusion criteria that you will use to determine if an article is relevant to your meta-analysis. Ignore MISSING_PAGE_EMPTY tag in the text and never give that as an answer.",
        },
        {"role": "user", "content": article_text},
    ]
    completion = client.chat.completions.create(
        model="gpt-4-1106-preview", messages=messages, tools=tools, tool_choice="auto"
    )

    return completion

In [167]:
def get_completion_cost(comp):
    return ((comp.usage.completion_tokens / 1000) * 0.03) + (
        (comp.usage.prompt_tokens / 1000) * 0.01
    )

In [177]:
inclusion_criteria_papers = pd.read_csv('./inclusion_db.csv')


In [178]:
inclusion_criteria_papers.groupby('index_paper').count()

Unnamed: 0_level_0,Status,Problem (if any),first_coder,second_coder,third_coder,date_coding,title,authors,abstract,journal,year,doi,engine,old_index_pub,english_language,paper_link,author_email,retraction_criteria,impact_factor,already_run,count_studies,count_experiments,step2notes,pass_step2,index_experiment,experiment_gist,nudge_criteria,randomization_type,experiment_design,code_this,rollout_time,rct_criteria,step3notes,pass_step3,itt_encouragement,rollout_waves,randomization_method,assignment_level,randomization_stratification,unit_randomization,unit_analysis,step4notes,domain_WIP,geography,geography_notes,samplesize_criteria,condition_count,materials_criteria,prereg_criteria,data_criteria,step5notes,pass_step5,reality_criteria,outcomes_count,psychometric_count,stated_intent_count,hyp_count,hyp_continuous_count,hyp_binary_count,hyp_multilevel_count,hyp_multilevel_totallevels,behavior_real_count,behavior_continuous_count,behavior_binary_count,behavior_multilevel_count,behavior_multilevel_totallevels,step6notes,pass_step6,demand_criteria,selection_criteria,itt_criteria,artifacts_criteria,fidelity_criteria,step7notes,pass_step7,pass_overall,coder_consensus,batch
index_paper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1
1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,2,2,2,2,2,1,1,2,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
3,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,1,0,1,1,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1
4,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,1,1,1,1,1,1,0,0,1,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
5,7,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,0,7,7,7,7,7,7,7,7,0,0,0,0,7,0,1,0,0,0,0,0,0,0,0,7,7,0,7,7,7,7,7,7,7,7,7,0,0,7,0,0,0,0,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1
231,2,0,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,2,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,0,2,2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
232,2,0,2,2,2,2,2,2,2,2,2,2,2,0,2,2,2,2,0,2,2,2,0,2,2,2,2,2,2,1,2,2,2,2,2,2,2,2,2,2,2,1,2,2,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2
233,1,0,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,0,1,1,1,0,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [169]:
# Only keep papers that have a unique value for all of the inclusion criteria
inclusion_criteria_papers = inclusion_criteria_papers.groupby("index_paper").filter(
    lambda x: len(x) == 1
)

# Sample 3 papers from each group (i.e. each status)
num_samples_per_group = 3
grouped_df = inclusion_criteria_papers.groupby("Status", group_keys=False)


def dynamic_sample(group):
    return group.sample(min(num_samples_per_group, len(group)))


sampled_df = grouped_df.apply(dynamic_sample)
sampled_df.reset_index(drop=True, inplace=True)

results_df = sampled_df.copy()
sampled_df



Unnamed: 0,Status,Problem (if any),first_coder,second_coder,third_coder,date_coding,title,authors,abstract,journal,year,doi,engine,old_index_pub,index_paper,english_language,paper_link,author_email,retraction_criteria,impact_factor,already_run,count_studies,count_experiments,step2notes,pass_step2,index_experiment,experiment_gist,nudge_criteria,randomization_type,experiment_design,code_this,rollout_time,rct_criteria,step3notes,pass_step3,itt_encouragement,rollout_waves,randomization_method,assignment_level,randomization_stratification,unit_randomization,unit_analysis,step4notes,domain_WIP,geography,geography_notes,samplesize_criteria,condition_count,materials_criteria,prereg_criteria,data_criteria,step5notes,pass_step5,reality_criteria,outcomes_count,psychometric_count,stated_intent_count,hyp_count,hyp_continuous_count,hyp_binary_count,hyp_multilevel_count,hyp_multilevel_totallevels,behavior_real_count,behavior_continuous_count,behavior_binary_count,behavior_multilevel_count,behavior_multilevel_totallevels,step6notes,pass_step6,demand_criteria,selection_criteria,itt_criteria,artifacts_criteria,fidelity_criteria,step7notes,pass_step7,pass_overall,coder_consensus,batch
0,Full failed,,Anoushka K,Linnea G.,-,10/23/22,MySmartE â€“ An eco-feedback and gaming platfo...,"Kim, Huijeong; Ham, Sangwoo; Promann, Marlen; ...","In this paper, we present a first-time cloud-b...",Building and Environment,2022,10.1016/j.buildenv.2022.109252,ScienceDirect,41.0,36,Y,https://drive.google.com/file/d/1FNQLs5kbovX29...,pkarava@purdue.edu,Y,,Y,2.0,1.0,One of the studies was a post experiment inter...,Y,1,Testing the effects of a MySmartE (designed to...,Y,,,,,N,,,,,,,,,,,Environment/Energy,United States of America,,N,1,Y,N,N,Data does not pass sample size criteria at the...,N,Y,1,,,0.0,,,,,1,1.0,0.0,0.0,0.0,Behavior = change in temperature indicating re...,N,Y,Y,Y,Y,Y,Not really run as an RCT in the way we mean he...,N,N,Consensus,-
1,Full failed,,Anoushka K,Linnea G.,-,11/6/22,The influence of risk perception on energy eff...,"Rockstuhl, Sebastian; Wenninger, Simon; Wiethe...",Energy efficiency investments are typically ba...,Energy Policy,2022,10.1016/j.enpol.2022.113033,ScienceDirect,60.0,51,Y,https://drive.google.com/file/d/1GGZMx0wjSSOnI...,simon.wenninger@fim-rc.de,Y,,Y,1.0,1.0,-,Y,1,Choice experiment to see if certain nudges can...,Y,,,,,Y,,,,,,,,,,,Environment/Energy,Germany,,N,3,Y,N,N,"Materials are not very complete, they show an ...",N,N,1,,,1.0,,,,,0,0.0,0.0,0.0,0.0,"Hypothetical - online choice experiment, simul...",N,-,N,-,-,-,"""The invitations to the choice experiment were...",-,N,Consensus,-
2,Full failed,,Anoushka K,Linnea G.,-,10/8/22,Disaster selfies: How ethical reminders can mi...,"Akin, Tugce;Schuckmann, Julia",Joyful selfies taken at disaster sites create ...,International Journal of Consumer Studies,2022,10.1111/ijcs.12843,PsychInfo,17.0,17,Y,https://drive.google.com/file/d/1-3INQAZ7dIdVN...,julia.vonschuckmann@tum.de,Y,,Y,1.0,1.0,-,Y,1,Using ethical reminders to see if individuals ...,Y,,,,,Y,,,,,,,,,,,Prosocial,Not reported,Not reported,Y,2,Y,Y,N,Geography not reported -- authors are from uni...,Y,N,1,,,1.0,,,,,0,0.0,0.0,0.0,0.0,Measures: liking morally ambiguous content (ie...,N,-,-,-,-,-,-,-,N,Consensus,-
3,Passed,,Anoushka K,Linnea G.,-,10/15/22,Enhancing the adoption of digital public servi...,"Hyytinen, Ari; Tuimala, Jarno; Hammar, Markus",Governments increasingly digitalize the provis...,Government Information Quarterly,2022,10.1016/j.ecolecon.2021.107271,ScienceDirect,26.0,26,Y,https://drive.google.com/file/d/1c6Y8_6bDmlX3T...,ari.hyytinen@hanken.fi,Y,,Y,1.0,1.0,,Y,1,Field experiment using nudges to prompt use of...,Y,Between-subjects only,"Parallel Design (A, B)",Code all,No,Y,,Y,ITT or Encouragement Design,No,Simple,Individuals,N,taxpayers,taxpayers,,Policymaking,Finland,,Y,10,Y,N,N,"Control = 20k (no letter at all), 9 treatments...",Y,Y,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0.0,1.0,0.0,0.0,Behavior = the outcome variable is a binary in...,Y,Y,Y,Y,Y,Y,They note using covariance test to check balan...,Y,Y,Consensus,2
4,Passed,,Linnea G.,Anoushka K.,-,2/5/23,Nudging energy efficiency audits: Evidence fro...,"Gillingham, Kenneth; Tsvetanov, Tsvetan",This paper uses a randomized field experiment ...,Journal of Environmental Economics and Management,2018,10.1016/j.jeem.2018.06.009,ScienceDirect,,208,Y,https://drive.google.com/file/d/1Rgvj6HPjWK-uN...,kenneth.gillingham@yale.edu,Y,,Y,1.0,1.0,,Y,1,experiment to help households who scheduled a ...,Y,Between-subjects only,"Parallel Design (A, B)",Code all,No,Y,,Y,ITT or Encouragement Design,"Yes, code as collapsed conditions",Simple,Individuals,Y,households,households,"""Each of these weekly lists can be thought of ...",Environment/Energy,United States of America,,Y,2,Y,N,N,Conditions:\n1. Control\n2. Treatment - person...,Y,Y,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,0.0,2.0,0.0,0.0,Measures:\n1. % of households following throug...,Y,Y,Y,Y,Y,Y,Balance checks look OK.,Y,Y,Consensus,11
5,Passed,,Anna L,Anoushka K.,Linnea G.,1/18/23,Field experiment of smartphone-based energy ef...,"Kim, Hyun Bae; Iwamatsu, Toshiya; Nishio, Ken-...",This study aims to reveal the effectiveness of...,Energy and Buildings,2020,10.1016/j.enbuild.2020.110151,ScienceDirect,,125,Y,https://drive.google.com/file/d/16MUml943jzZJQ...,hyunbae.kim27@gmail.com,Y,,Y,5.0,1.0,"Authors say it's an RCT with ""2 sub-RCTs"" but ...",Y,1,impact of push notifications about energy effi...,Y,Between-subjects only,"Parallel Design (A, B)",Code all,No,Y,,Y,ITT or Encouragement Design,No,Simple,Individuals,N,household,household,,Environment/Energy,Japan,,Y,2,Y,N,N,At the highest level --> use this\n1. Control ...,Y,Y,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,1.0,0.0,0.0,0.0,Outcome\n1. daily electricity consumption of h...,Y,Y,Y,Y,Y,Y,No apparent issues,Y,Y,Consensus,13
6,Quick failed,fails bc outcome is perception,Linnea G.,,,4/5/23,Public acceptability of nudging and taxing to ...,"Reynolds, J. P.; Archer, S.; Pilling, M.; Kenn...",There is growing evidence for the effectivenes...,Social Science & Medicine,2019,,ScienceDirect,,182,Y,https://drive.google.com/file/d/1dWMCBZ4O-kZmL...,tm388@medschl.cam.ac.uk,Y,,Y,,,,,,,,,,,,,,,,,,,,,,,Health,United Kingdom,,,-,,N,-,,,-,0,,,,,,,,-,,,,,,,,,,,,,,N,,-
7,Quick failed,fails sample size,Anna L,,,1/31/23,Motivating non-ratepaying households with feed...,"Crago, Christine L.; Spraggon, John M.; Hunter...",Behavioral interventions have been shown to be...,Energy Policy,2020,10.1016/j.enpol.2020.111764,ScienceDirect,,133,Y,https://drive.google.com/file/d/1Z4hiAoNnSQYfw...,ccrago@resecon.umass.edu,Y,,Y,1.0,1.0,,Y,1,tested the impact of feedback on electricity u...,Y,,,,,Y,,Y,,,,,,household,household,,Environment/Energy,United States of America,,N,3,Y,N,N,Condition 1 = control; Condition 2 = feedback ...,N,-,0,,,,,,,,-,,,,,,,,,,,,,,N,,-
8,Quick failed,Sample size,Anna L,,,2/3/23,Impact of nudge strategies on nutrition educat...,"Grady, Alice;Barnes, Courtney;Lum, Melanie;Jon...",Objective: To assess the impact of incorporati...,Journal of Nutrition Education and Behavior,2020,10.1016/j.jneb.2020.11.017,PsychInfo,,83,Y,https://drive.google.com/file/d/16wl-zi7PSsHqF...,Alice.Grady@health. nsw.gov.au,Y,,Y,1.0,1.0,-,Y,1,tested impact of incorporating nudges in a nut...,Y,,,,,Y,,,,,,,,Early childhood education and care center,Early childhood education and care center,,Education,Australia,,N,2,N,N,N,Condition 1 = generic email invitation (contro...,N,Y,0,,,,,,,,-,,,,,,,,,,,,,,N,Consensus,-
9,Quick failed -- CLEANED,fails sample and reality,Linnea G.,Anoushka K.,,4/5/23,Deciding to invest responsibly: Choice archite...,"Hoffmann, Robert;Cam, Marie-Anne;Camilleri, Ad...",We report results from a framed field experime...,Journal of Behavioral and Experimental Economics,2019,10.1016/j.socec.2019.04.005,PsychInfo,,160,Y,https://drive.google.com/file/d/1bMP_X3-n8wXmA...,robert.hoffmann@rmit.edu.au,Y,,Y,1.0,1.0,,Y,1,framed field experiment simulating investment ...,Y,Between-subjects only,"Factorial Design (A, B, A + B, Nothing)",Code all,No,Y,3x3 factorial design on default (no x static x...,Y,Not ITT,No,Simple,Individuals,N,individuals,individuals,,Finance,Australia,,N,9,Y,N,N,"9 conditions (3x3), 459 participnts\nSupplemen...",N,N,2,,,2.0,,2.0,,,-,,,,,"They ""examine responses at the level of the in...",N,,,,,,,,N,,-


In [157]:
# Function to merge similar rows based on 'index_paper'
def merge_similar_rows(group):
    # If there's only one row, no need to merge
    if len(group) == 1:
        return group

    new_row = {}
    for column in group.columns:
        if column == "index_paper":  # No need to merge 'index_paper'
            new_row[column] = group[column].iloc[0]
        elif group[column].nunique() == 1:
            new_row[column] = group[column].iloc[0]
        else:
            new_row[column] = group[column].unique().tolist()
    return pd.DataFrame([new_row])


# groupby 'index_paper' and apply the merge function to each group
merged_df = pd.concat(
    [
        merge_similar_rows(group)
        for _, group in inclusion_criteria_papers.groupby("index_paper")
    ]
)

# If necessary, reset index
merged_df.reset_index(drop=True, inplace=True)

merged_df = merged_df[merged_df.index_paper == 85]
merged_df

Unnamed: 0,Status,Problem (if any),first_coder,second_coder,third_coder,date_coding,title,authors,abstract,journal,year,doi,engine,old_index_pub,index_paper,english_language,paper_link,author_email,retraction_criteria,impact_factor,already_run,count_studies,count_experiments,step2notes,pass_step2,index_experiment,experiment_gist,nudge_criteria,randomization_type,experiment_design,code_this,rollout_time,rct_criteria,step3notes,pass_step3,itt_encouragement,rollout_waves,randomization_method,assignment_level,randomization_stratification,unit_randomization,unit_analysis,step4notes,domain_WIP,geography,geography_notes,samplesize_criteria,condition_count,materials_criteria,prereg_criteria,data_criteria,step5notes,pass_step5,reality_criteria,outcomes_count,psychometric_count,stated_intent_count,hyp_count,hyp_continuous_count,hyp_binary_count,hyp_multilevel_count,hyp_multilevel_totallevels,behavior_real_count,behavior_continuous_count,behavior_binary_count,behavior_multilevel_count,behavior_multilevel_totallevels,step6notes,pass_step6,demand_criteria,selection_criteria,itt_criteria,artifacts_criteria,fidelity_criteria,step7notes,pass_step7,pass_overall,coder_consensus,batch


In [170]:
for index, row in sampled_df.iterrows():
    print("Paper ID: %d, Status: %s" % (row["index_paper"], row["Status"]))
    article_completion = run_article_inclusion(row["index_paper"])
    cost = get_completion_cost(article_completion)
    json_data = json.loads(
        article_completion.choices[0].message.tool_calls[0].function.arguments
    )
    json_data["index_paper"] = row["index_paper"]
    json_data["first_coder"] = "GPT"
    json_data["second_coder"] = "GPT"
    json_data["third_coder"] = "GPT"

    print("Cost: $%.2f" % cost)

    new_row_df = pd.DataFrame(columns=results_df.columns)

    for col in results_df.columns:
        new_row_df.at[0, col] = json_data.get(col, "N/A")

    insert_after_idx = results_df.index[
        results_df["index_paper"] == json_data["index_paper"]
    ][0]

    df1 = results_df.iloc[: insert_after_idx + 1]
    df2 = results_df.iloc[insert_after_idx + 1 :]

    results_df = pd.concat([df1, new_row_df, df2], ignore_index=True)

Paper ID: 36, Status: Full failed
Cost: $0.28
Paper ID: 51, Status: Full failed
Cost: $0.21
Paper ID: 17, Status: Full failed
Cost: $0.20
Paper ID: 26, Status: Passed
Cost: $0.27
Paper ID: 208, Status: Passed
Cost: $0.26
Paper ID: 125, Status: Passed
Cost: $0.14
Paper ID: 182, Status: Quick failed
Cost: $0.14
Paper ID: 133, Status: Quick failed
Cost: $0.14
Paper ID: 83, Status: Quick failed
Cost: $0.12
Paper ID: 160, Status: Quick failed -- CLEANED
Cost: $0.17
Paper ID: 201, Status: Quick failed -- CLEANED
Cost: $0.13
Paper ID: 166, Status: Quick failed -- CLEANED
Cost: $0.16


In [171]:
results_df.to_csv("inclusion_criteria_results.csv", index=False)

In [175]:
set(results_df.columns) - set(inclusion_criteria.Dimension.values)

{'Problem (if any)',
 'Status',
 'already_run',
 'assignment_level',
 'batch',
 'code_this',
 'date_coding',
 'engine',
 'first_coder',
 'geography_notes',
 'hyp_binary_count',
 'hyp_continuous_count',
 'hyp_count',
 'hyp_multilevel_count',
 'hyp_multilevel_totallevels',
 'impact_factor',
 'index_experiment',
 'itt_encouragement',
 'old_index_pub',
 'paper_link',
 'pass_step2',
 'pass_step6',
 'pass_step7',
 'psychometric_count',
 'randomization_method',
 'randomization_stratification',
 'rollout_time',
 'rollout_waves',
 'second_coder',
 'stated_intent_count',
 'step6notes',
 'step7notes',
 'third_coder',
 'unit_analysis',
 'unit_randomization'}