In [17]:
import dotenv
import os
import re
import os
import glob
import pandas as pd
import numpy as np
from pathlib import Path
from openai import OpenAI
import json

dotenv.load_dotenv()

OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

client = OpenAI()

q_and_a_behavior_df = pd.read_csv("behavior_level.csv")
q_and_a_experiment_df = pd.read_csv("experimental_level.csv")


master_db_df = pd.read_csv("MasterDB_Full.csv")
master_db_df["Paper_Exp_ID_better"] = master_db_df.Paper_Exp_ID.str.extract(
    r"([A-Z]\_\d+)"
)

In [18]:
file_dict = {}

current_directory = os.getcwd()

relative_path = "articles/"

folder_path = os.path.join(current_directory, relative_path)

markdown_pattern = "*.mmd"
markdown_files = glob.glob(os.path.join(folder_path, markdown_pattern))

pattern_id = r"([A-Z]\_\d+)"

for markdown_file in markdown_files:
    match = re.search(pattern_id, markdown_file)
    if match:
        code = match.group(1)
        if code in file_dict:
            file_dict[code].append(markdown_file)
        else:
            file_dict[code] = [markdown_file]
    else:
        print("No match found.")


def read_files_to_string(docs: list):
    text = ""
    for d in docs:
        text += Path(d).read_text()
    return text


In [19]:
inclusion_criteria = pd.read_csv('./raw_questions/Inclusion_Criteria.csv')
inclusion_criteria = inclusion_criteria.drop([0, 1, 2, 9, 15, 20])
inclusion_criteria


Unnamed: 0,Number,Step,Dimension,Description,Scale,Type,Who,Rating
3,1,Search,title,Title of the publication as captured by the se...,,string,Pre-filled (R),
4,1,Search,authors,Authors of the publication as captured by the ...,,string,Pre-filled (R),
5,1,Search,abstract,Abstract of the publication as captured by the...,,string,Pre-filled (R),
6,1,Search,journal,Journal of the publication as captured by the ...,,string,Pre-filled (R),
7,1,Search,year,Year of publication as captured by the search ...,,number,Pre-filled (R),
8,1,Search,doi,DOI as captured by the search engine; any empt...,,string,Pre-filled (R),
10,1,Search,title_nudge,"Is ""nudge"", ""nudging"", or ""choice architecture...","[""Yes"", ""No""]",enum,Pre-filled (R),
11,1,Search,abstract_nudge,"Is ""nudge"", ""nudging"", or ""choice architecture...","[""Yes"", ""No""]",enum,Pre-filled (R),
12,1,Search,neither_nudge,"Is ""nudge"", ""nudging"", or ""choice architecture...","[""Yes"", ""No""]",enum,Pre-filled (R),
13,1,Search,index_paper,Assign an index number to the remaining public...,,number,Pre-filled (R),


In [20]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "article_criteria",
            "description": "A list of questions to ask to determine if an article meets the inclusion criteria for the meta-analysis.",
            "parameters": {
                "type": "object",
                "properties": {
                    "title": {
                        "type": "string",
                        "description": "The city and state, e.g. San Francisco, CA",
                    },
                    "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["location"],
            },
        },
    }
]

result_dict = {"properties": {}}
required = []

for index, row in inclusion_criteria.iterrows():
    dimension = row["Dimension"]
    description = row["Description"]
    prop_type = "string" if row["Type"] == "enum" else row["Type"]

    # Add properties to the dictionary
    result_dict["properties"][dimension] = {
        "description": description,
        "type": prop_type,
    }

    required.append(dimension)

    # If the type is 'Enum', add the 'enum' field with the enum values
    if row["Type"] == "enum" and row["Scale"] is not None:
        result_dict["properties"][dimension]["enum"] = json.loads(row["Scale"])


tools[0]["function"]["parameters"]["properties"] = result_dict["properties"]
tools[0]["function"]["parameters"]["required"] = required
# Print the resulting dictionary
print(result_dict)

{'properties': {'title': {'description': 'Title of the publication as captured by the search engine', 'type': 'string'}, 'authors': {'description': 'Authors of the publication as captured by the search engine', 'type': 'string'}, 'abstract': {'description': 'Abstract of the publication as captured by the search engine', 'type': 'string'}, 'journal': {'description': 'Journal of the publication as captured by the search engine', 'type': 'string'}, 'year': {'description': 'Year of publication as captured by the search engine (2018-2022)', 'type': 'number'}, 'doi': {'description': 'DOI as captured by the search engine; any empty DOIs filled out by the internal team', 'type': 'string'}, 'title_nudge': {'description': 'Is "nudge", "nudging", or "choice architecture" in the title? (It should be in the title or abstract per the search.)', 'type': 'string', 'enum': ['Yes', 'No']}, 'abstract_nudge': {'description': 'Is "nudge", "nudging", or "choice architecture" in the abstract? (It should be i

In [21]:



def run_article_inclusion(article_id):
    article_text = read_files_to_string(file_dict["A_%d" % article_id])
    article_text = re.sub(r"MISSING_PAGE_EMPTY", "", article_text)

    messages = [
        {
            "role": "system",
            "content": "You are a researcher looking for articles to include in a meta-analysis. You have a set of inclusion criteria that you will use to determine if an article is relevant to your meta-analysis. Ignore MISSING_PAGE_EMPTY tag in the text and never give that as an answer.",
        },
        {"role": "user", "content": article_text},
    ]
    completion = client.chat.completions.create(
        model="gpt-4-1106-preview", messages=messages, tools=tools, tool_choice="auto"
    )

    return completion

In [22]:
def get_completion_cost(comp):
    return ((comp.usage.completion_tokens / 1000) * 0.03) + (
        (comp.usage.prompt_tokens / 1000) * 0.01
    )

In [23]:
inclusion_criteria_papers = pd.read_csv('./inclusion_db.csv')


In [24]:
inclusion_criteria_papers.groupby('index_paper').count()

Unnamed: 0_level_0,Status,Problem (if any),first_coder,second_coder,third_coder,date_coding,title,authors,abstract,journal,...,demand_criteria,selection_criteria,itt_criteria,artifacts_criteria,fidelity_criteria,step7notes,pass_step7,pass_overall,coder_consensus,batch
index_paper,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1,0,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
2,2,0,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
3,1,1,1,1,0,1,1,1,1,1,...,0,0,0,0,0,0,0,1,0,1
4,1,0,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
5,7,0,7,7,7,7,7,7,7,7,...,7,7,7,7,7,7,7,7,7,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,1,0,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,0,1,1,1
231,2,0,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
232,2,0,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
233,1,0,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [25]:
# Only keep papers that have a unique value for all of the inclusion criteria
inclusion_criteria_papers = inclusion_criteria_papers.groupby("index_paper").filter(
    lambda x: len(x) == 1
)

# Filter the dataframe to include only "Full Passed" or "Full Failed" statuses
filtered_df = inclusion_criteria_papers[
    inclusion_criteria_papers["Status"].isin(["Passed", "Full failed"])
]

# Sample 3 papers from each group (i.e. each status)
num_samples_per_group = 3
grouped_df = filtered_df.groupby("Status", group_keys=False)


def dynamic_sample(group):
    return group.sample(min(num_samples_per_group, len(group)))


sampled_df = grouped_df.apply(dynamic_sample)
sampled_df.reset_index(drop=True, inplace=True)

results_df = sampled_df.copy()
sampled_df


Unnamed: 0,Status,Problem (if any),first_coder,second_coder,third_coder,date_coding,title,authors,abstract,journal,...,demand_criteria,selection_criteria,itt_criteria,artifacts_criteria,fidelity_criteria,step7notes,pass_step7,pass_overall,coder_consensus,batch
0,Full failed,,Anoushka K,Linnea G.,-,1/31/23,Can nudges be transparent and yet effective?,"Bruns, Hendrik;Kantorowicz-Reznichenko, Elena;...",Nudges receive growing attention as an effecti...,Journal of Economic Psychology,...,-,-,-,-,-,-,-,N,Consensus,-
1,Full failed,,Anoushka K,Linnea G.,-,10/28/22,Nudging food service users to choose fruit- an...,"Yi, Sunghwan;Kanetkar, Vinay;Brauer, Paula",Although nudging has been found to promote the...,Appetite,...,Y,Y,Y,Y,N,"""Among sixty weekdays, 8 days had to be remove...",N,N,Consensus,-
2,Full failed,,Anoushka K,Linnea G.,-,10/2/22,Behavioral Nudges as Patient Decision Support ...,"Horne, Benjamin D.; Muhlestein, Joseph B.; Lap...",Background\n Medication adherence is generall...,American Heart Journal,...,-,-,-,-,-,-,-,N,Consensus,-
3,Passed,,Anoushka K,Linnea G.,-,11/13/22,Behavioural approaches and conservation messag...,"Walsh, Patrick J.","New Zealandâ€™s iconic bird species, the kiwi,...",Global Ecology and Conservation,...,Y,Y,Y,Y,Y,"""Due to some data entry, coding, and related e...",Y,Y,Consensus,4
4,Passed,,Linnea G.,Anoushka K.,-,5/15/23,Nudging green food: The effects of a hedonic c...,"Reinholdsson, Tommy;HedesstrÃ¶m, Martin;EjelÃ¶...",Meat consumption is associated with both publi...,Journal of Consumer Behaviour,...,Y,Y,Y,Y,Y,It's possible that customers could have visite...,,Y,Consensus,14
5,Passed,,Palashi S.,Anoushka K,Linnea G.,2/5/23,Testing for crowd out in social nudges: Eviden...,"Brandon, Alec;List, John A.;Metcalfe, Robert D...",This study considers the response of household...,PNAS Proceedings of the National Academy of Sc...,...,Y,Y,Y,Y,Y,Some PER participants got not only a call but ...,Y,Y,Consensus,10


In [157]:
# Function to merge similar rows based on 'index_paper'
def merge_similar_rows(group):
    # If there's only one row, no need to merge
    if len(group) == 1:
        return group

    new_row = {}
    for column in group.columns:
        if column == "index_paper":  # No need to merge 'index_paper'
            new_row[column] = group[column].iloc[0]
        elif group[column].nunique() == 1:
            new_row[column] = group[column].iloc[0]
        else:
            new_row[column] = group[column].unique().tolist()
    return pd.DataFrame([new_row])


# groupby 'index_paper' and apply the merge function to each group
merged_df = pd.concat(
    [
        merge_similar_rows(group)
        for _, group in inclusion_criteria_papers.groupby("index_paper")
    ]
)

# If necessary, reset index
merged_df.reset_index(drop=True, inplace=True)

merged_df = merged_df[merged_df.index_paper == 85]
merged_df

Unnamed: 0,Status,Problem (if any),first_coder,second_coder,third_coder,date_coding,title,authors,abstract,journal,year,doi,engine,old_index_pub,index_paper,english_language,paper_link,author_email,retraction_criteria,impact_factor,already_run,count_studies,count_experiments,step2notes,pass_step2,index_experiment,experiment_gist,nudge_criteria,randomization_type,experiment_design,code_this,rollout_time,rct_criteria,step3notes,pass_step3,itt_encouragement,rollout_waves,randomization_method,assignment_level,randomization_stratification,unit_randomization,unit_analysis,step4notes,domain_WIP,geography,geography_notes,samplesize_criteria,condition_count,materials_criteria,prereg_criteria,data_criteria,step5notes,pass_step5,reality_criteria,outcomes_count,psychometric_count,stated_intent_count,hyp_count,hyp_continuous_count,hyp_binary_count,hyp_multilevel_count,hyp_multilevel_totallevels,behavior_real_count,behavior_continuous_count,behavior_binary_count,behavior_multilevel_count,behavior_multilevel_totallevels,step6notes,pass_step6,demand_criteria,selection_criteria,itt_criteria,artifacts_criteria,fidelity_criteria,step7notes,pass_step7,pass_overall,coder_consensus,batch


In [37]:
article_completion = run_article_inclusion(198)
cost = get_completion_cost(article_completion)
json_data = json.loads(
    article_completion.choices[0].message.tool_calls[0].function.arguments
)
json_data

{'title': 'Can nudges be made transparent and yet remain effective?',
 'authors': 'Authors not provided',
 'abstract': "The ethics of 'nudging'—using behavioral insights to subtly steer people's choices—has been hotly debated. Particularly controversial is the covert nature of many nudges, as critics argue it violates individual autonomy. Meanwhile, proponents claim transparency could inhibit nudge efficacy. We tested whether disclosing nudges’ intent—to increase contributions to climate protection—impacts their effectiveness. A laboratory experiment with real monetary trade-offs and contributions toward actual carbon license retirement demonstrates that transparent nudges can remain effective. This contributes to the discussion on ethical policy-making involving nudges, suggesting they can be openly deployed without compromising their utility.",
 'journal': 'Journal not provided',
 'year': 2020,
 'doi': '',
 'title_nudge': 'Yes',
 'abstract_nudge': 'Yes',
 'neither_nudge': 'No',
 'ind

In [48]:
result_df = pd.DataFrame(columns=["paper_id", "dimension", "gpt", "human"])

for index, row in sampled_df.iterrows():
    print("Paper ID: %d, Status: %s" % (row["index_paper"], row["Status"]))
    article_completion = run_article_inclusion(row["index_paper"])
    cost = get_completion_cost(article_completion)
    json_data = json.loads(
        article_completion.choices[0].message.tool_calls[0].function.arguments
    )
    json_data["index_paper"] = row["index_paper"]
    json_data["first_coder"] = "GPT"
    json_data["second_coder"] = "GPT"
    json_data["third_coder"] = "GPT"

    print("Cost: $%.2f" % cost)

    res = []
    for col in sampled_df.columns:
        if col in json_data:
            human_answer = sampled_df[sampled_df.index_paper == row["index_paper"]].loc[
                0, col
            ]

            if human_answer == "Y":
                human_answer = "Yes"
            elif human_answer == "N":
                human_answer = "No"

            res.append(
                {
                    "paper_id": row["index_paper"],
                    "dimension": col,
                    "gpt": json_data.get(col, "N/A"),
                    "human": human_answer,
                }
            )

    result_df = result_df.append(res, ignore_index=True)

result_df

Paper ID: 198, Status: Full failed
Cost: $0.21
Paper ID: 40, Status: Full failed


In [26]:
# Function that puts gpt answer in a row below the human answer
def process_sampled_df(sampled_df, results_df):
    """
    Process a sampled DataFrame and update the results DataFrame.

    Args:
        sampled_df (pandas.DataFrame): The sampled DataFrame containing the data to process.
        results_df (pandas.DataFrame): The results DataFrame to update.

    Returns:
        pandas.DataFrame: The updated results DataFrame.
    """
    for index, row in sampled_df.iterrows():
        print("Paper ID: %d, Status: %s" % (row["index_paper"], row["Status"]))
        article_completion = run_article_inclusion(row["index_paper"])
        cost = get_completion_cost(article_completion)
        json_data = json.loads(
            article_completion.choices[0].message.tool_calls[0].function.arguments
        )
        json_data["index_paper"] = row["index_paper"]
        json_data["first_coder"] = "GPT"
        json_data["second_coder"] = "GPT"
        json_data["third_coder"] = "GPT"

        print("Cost: $%.2f" % cost)

        new_row_df = pd.DataFrame(columns=results_df.columns)

        for col in results_df.columns:
            new_row_df.at[0, col] = json_data.get(col, "N/A")

        insert_after_idx = results_df.index[
            results_df["index_paper"] == json_data["index_paper"]
        ][0]

        df1 = results_df.iloc[: insert_after_idx + 1]
        df2 = results_df.iloc[insert_after_idx + 1 :]

        results_df = pd.concat([df1, new_row_df, df2], ignore_index=True)

    return results_df

Paper ID: 198, Status: Full failed
Cost: $0.20
Paper ID: 40, Status: Full failed
Cost: $0.23
Paper ID: 6, Status: Full failed
Cost: $0.15
Paper ID: 66, Status: Passed
Cost: $0.19
Paper ID: 230, Status: Passed
Cost: $0.20
Paper ID: 186, Status: Passed
Cost: $0.17


In [30]:
results_df

Unnamed: 0,index_paper,title,description
0,1,Some human title 1,Some human description 1
1,2,Some human title 2,Some human description 2


In [175]:
set(results_df.columns) - set(inclusion_criteria.Dimension.values)

{'Problem (if any)',
 'Status',
 'already_run',
 'assignment_level',
 'batch',
 'code_this',
 'date_coding',
 'engine',
 'first_coder',
 'geography_notes',
 'hyp_binary_count',
 'hyp_continuous_count',
 'hyp_count',
 'hyp_multilevel_count',
 'hyp_multilevel_totallevels',
 'impact_factor',
 'index_experiment',
 'itt_encouragement',
 'old_index_pub',
 'paper_link',
 'pass_step2',
 'pass_step6',
 'pass_step7',
 'psychometric_count',
 'randomization_method',
 'randomization_stratification',
 'rollout_time',
 'rollout_waves',
 'second_coder',
 'stated_intent_count',
 'step6notes',
 'step7notes',
 'third_coder',
 'unit_analysis',
 'unit_randomization'}

In [14]:
from sklearn.metrics.pairwise import cosine_similarity


def calculate_cosine_similarity(text1, text2):
    embeding1 = client.embeddings.create(
        model="text-embedding-ada-002",
        input=text1,
        encoding_format="float",
    )
    embeding2 = client.embeddings.create(
        model="text-embedding-ada-002",
        input=text2,
        encoding_format="float",
    )

    word1_embedding = np.array(embeding1.data[0].embedding)
    word2_embedding = np.array(embeding2.data[0].embedding)

    word1_embedding = word1_embedding.reshape(1, -1)
    word2_embedding = word2_embedding.reshape(1, -1)
    # Calculate cosine similarity
    similarity = cosine_similarity(word1_embedding, word2_embedding)[0][0]

    return similarity


calculate_cosine_similarity(
    "A list of questions to ask to know if an article meets the inclusion criteria for the meta-analysis.",
    "A list of questions to ask to determine if an article meets the inclusion criteria for the meta-analysis.",
)

0.9973966680913141

In [None]:
r

list