In [2]:
import dotenv
import os
import re
import os
import glob
import pandas as pd
import numpy as np
import openai
import json

dotenv.load_dotenv()

OPENAI_KEY = os.environ.get("OPENAI_API_KEY")

q_and_a_behavior_df = pd.read_csv('behavior_level.csv')
q_and_a_experiment_df = pd.read_csv('experimental_level.csv')

master_db_df = pd.read_csv('MasterDB_Full.csv')
master_db_df['Paper_Exp_ID_better'] = master_db_df.Paper_Exp_ID.str.extract(r'([A-Z]\_\d+)')

In [101]:
article_ids_list = [
    "A_114",
    "A_116",
    "A_137",
    "A_152",
    "A_168",
    "A_181",
    "A_197",
    "A_19",
    "A_228",
    "A_30",
    "A_31",
    "A_39",
    "A_55",
    "A_62",
    "A_76",
    "A_87",
]

file_dict = {}

folder_path = "../server/files/sample_paper"
markdown_pattern = "*.mmd"
markdown_files = glob.glob(os.path.join(folder_path, markdown_pattern))

pattern_id = r'([A-Z]\_\d+)'

for markdown_file in markdown_files:
    match = re.search(pattern_id, markdown_file)
    if match:
        code = match.group(1)
        if code in file_dict:
            file_dict[code].append(markdown_file)
        else:
            file_dict[code] = [markdown_file]
    else:
        print("No match found.")


In [102]:
file_dict['A_19']

['../server/files/sample_paper/A_19_2022_DoHonestyNudges.mmd']

### Creates a vector database from the files for the given article id

In [103]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from pathlib import Path


def read_files_to_vector(docs: list):
    text = ""
    for d in docs:
        text += Path(d).read_text()

    headers_to_split_on = [
        ("#", "Header 1"),
        ("##", "Header 2"),
        ("###", "Header 3"),
        ("####", "Header 4"),
    ]

    markdown_splitter = MarkdownHeaderTextSplitter(
        headers_to_split_on=headers_to_split_on
    )
    md_header_splits = markdown_splitter.split_text(text)

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=100000, chunk_overlap=200)

    # splits = text_splitter.split_documents(text)

    splits = text_splitter.create_documents([text])

    print("number of documents on split:", len(splits))

    embedding = OpenAIEmbeddings(openai_api_key=OPENAI_KEY)

    vectordb = Chroma.from_documents(documents=splits, embedding=embedding)

    return vectordb

### Article class where the vectordb is saved and you can query the database

In [104]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.callbacks import get_openai_callback


class ArticleQA:
    def __init__(self, vector_db, model="gpt-4-1106-preview"):
        self.model = model
        self.vector_db = vector_db

    def query_context(self, question, answer_format):
        # Build prompt
        template = (
            """Use the following pieces of context to answer the question at the end according to the format of the answer provided. 
        If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. 
        {context}
        Question: {question}, Answer format: """
            + answer_format
            + """,
        Answer:"""
        )

        QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

        llm = ChatOpenAI(model_name=self.model, temperature=0)

        qa_chain = RetrievalQA.from_chain_type(
            llm,
            retriever=self.vector_db.as_retriever(
                search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10}
            ),
            chain_type="stuff",
            return_source_documents=True,
            chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        )

        answer_dic = {}

        with get_openai_callback() as cb:
            result = qa_chain({"query": question})
            answer_dic["gpt_answer"] = result
            answer_dic["prompt_tokens"] = cb.prompt_tokens
            answer_dic["completion_tokens"] = cb.completion_tokens
            answer_dic["total_cost"] = cb.total_cost

        return answer_dic

    def query_condition(self, condition, question, answer_format):
        # Build prompt
        template = (
            """Use the following pieces of context to answer the question at the end according to the format of the answer provided. 
        If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. 
        {context}
        You named one of the experimental condition of in this article """
            + condition
            + """. Can you answer the following question about this condition? 
        Question: {question}, Answer format: """
            + answer_format
            + """,
        Answer:"""
        )

        QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

        llm = ChatOpenAI(model_name=self.model, temperature=0)

        qa_chain = RetrievalQA.from_chain_type(
            llm,
            retriever=self.vector_db.as_retriever(
                search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10}
            ),
            chain_type="stuff",
            return_source_documents=True,
            chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        )

        answer_dic = {}

        with get_openai_callback() as cb:
            result = qa_chain({"query": question})
            answer_dic["gpt_answer"] = result
            answer_dic["prompt_tokens"] = cb.prompt_tokens
            answer_dic["completion_tokens"] = cb.completion_tokens
            answer_dic["total_cost"] = cb.total_cost

        return answer_dic

    def query_behavior(self, behavior, question, answer_format):
        # Build prompt
        template = (
            """Use the following pieces of context to answer the question at the end according to the format of the answer provided. 
        If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. 
        {context}
        You named one of the behavioral outcomes in this article """
            + behavior
            + """. Can you answer the following question about this behavior? \n
        Question: {question}, Answer format: """
            + answer_format
            + """,
        Answer:"""
        )

        QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

        # QA_CHAIN_PROMPT = PromptTemplate(
        #     template=template, input_variables=["context", "question", "answer_format"]
        # )

        llm = ChatOpenAI(model_name=self.model, temperature=0)

        qa_chain = RetrievalQA.from_chain_type(
            llm,
            retriever=self.vector_db.as_retriever(
                search_type="mmr", search_kwargs={"k": 5, "fetch_k": 10}
            ),
            chain_type="stuff",
            return_source_documents=True,
            chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        )

        answer_dic = {}

        with get_openai_callback() as cb:
            result = qa_chain({"query": question})
            answer_dic["gpt_answer"] = result
            answer_dic["prompt_tokens"] = cb.prompt_tokens
            answer_dic["completion_tokens"] = cb.completion_tokens
            answer_dic["total_cost"] = cb.total_cost

        return answer_dic

In [105]:
def get_condition_behavior_human_answers(master_db, article_id):
    condition_keyword_list = [
        "condition_financial",
        "condition_educational",
        "condition_forbid",
        "condition_channel_snailmail",
        "condition_channel_email",
        "condition_channel_paperlive",
        "condition_channel_inperson",
        "condition_channel_mobileapp",
        "condition_channel_online",
        "condition_channel_sms",
        "condition_channel_phone",
        "condition_channel_nothing",
    ]

    behavior_keywords_list = [
        "behavior_priority",
        "behavior_focal",
        "behavior_focal_estimate",
        "behavior_metric",
        "behavior_selfreport",
        "financial_reqt",
        "travel_reqt",
        "planning_reqt",
        "location_reqt",
        "timing_reqt",
        "prosociality",
        "delay",
        "authority",
    ]

    to_process = master_db[master_db["Paper_Exp_ID_better"] == article_id]

    first_row_entries_behavior = to_process.groupby("behavior_description").first()[
        behavior_keywords_list
    ]

    first_row_entries_condition = to_process.groupby("condition_name").first()[
        condition_keyword_list
    ]

    return first_row_entries_behavior, first_row_entries_condition

In [106]:
def get_article_gpt_condition_score(master_db, file_dict, paper_exp_id):
    article = ArticleQA(read_files_to_vector(file_dict[paper_exp_id]))

    behavior_human, condition_human = get_condition_behavior_human_answers(
        master_db, paper_exp_id
    )

    results_df = pd.DataFrame(
        columns=["Paper_Exp_ID", "Condition", "Variable", "Human Answer", "GPT Answer"]
    )

    for condition_name in list(condition_human.index):
        print("Condition:", condition_name)
        for col in list(condition_human.columns):
            description_format = q_and_a_experiment_df[
                q_and_a_experiment_df["Variable name"] == col
            ].values[0]
            human_answer = condition_human.loc[condition_name, col]
            behavior_question = description_format[2]
            behavior_answer_format = description_format[3]

            gpt_answer = article.query_condition(
                condition_name, behavior_question, behavior_answer_format
            )
            gpt_result = gpt_answer["gpt_answer"]["result"]

            results_df = results_df.append(
                {
                    "Paper_Exp_ID": paper_exp_id,
                    "Condition": condition_name,
                    "Variable": col,
                    "Human Answer": human_answer,
                    "GPT Answer": gpt_result,
                    "GPT Cost": gpt_answer["total_cost"],
                    "GPT Prompt Tokens": gpt_answer["prompt_tokens"],
                    "GPT Completion Tokens": gpt_answer["completion_tokens"],
                },
                ignore_index=True,
            )

    return results_df

In [107]:
def get_article_gpt_behavior_score(master_db, file_dict, paper_exp_id):
    article = ArticleQA(read_files_to_vector(file_dict[paper_exp_id]))

    behavior_human, condition_human = get_condition_behavior_human_answers(
        master_db, paper_exp_id
    )

    results_df = pd.DataFrame(
        columns=["Paper_Exp_ID", "Behavior", "Variable", "Human Answer", "GPT Answer"]
    )

    for behavior_name in list(behavior_human.index):
        print("Behavior:", behavior_name)
        for col in list(behavior_human.columns):
            description_format = q_and_a_behavior_df[
                q_and_a_behavior_df["Variable name"] == col
            ].values[0]
            human_answer = behavior_human.loc[behavior_name, col]
            behavior_question = description_format[2]
            behavior_answer_format = description_format[3]

            gpt_answer = article.query_behavior(
                behavior_name, behavior_question, behavior_answer_format
            )
            gpt_result = gpt_answer["gpt_answer"]["result"]

            results_df = results_df.append(
                {
                    "Paper_Exp_ID": paper_exp_id,
                    "Behavior": behavior_name,
                    "Variable": col,
                    "Human Answer": human_answer,
                    "GPT Answer": gpt_result,
                    "GPT Cost": gpt_answer["total_cost"],
                    "GPT Prompt Tokens": gpt_answer["prompt_tokens"],
                    "GPT Completion Tokens": gpt_answer["completion_tokens"],
                },
                ignore_index=True,
            )

    return results_df

In [108]:
A_55 = get_article_gpt_behavior_score(master_db_df, file_dict, "A_55")

1
Behavior: change in address
Behavior: lodgment
Behavior: myGov registration
Behavior: number of days to lodgement
Behavior: number of inbound calls
Behavior: use of MyTax


In [110]:
A_55[A_55['Variable'] == 'delay']

Unnamed: 0,Paper_Exp_ID,Behavior,Variable,Human Answer,GPT Answer,GPT Completion Tokens,GPT Cost,GPT Prompt Tokens
11,A_55,change in address,delay,Unclear,Unclear,2.0,0.02458,2452.0
24,A_55,lodgment,delay,Unclear,Unclear,2.0,0.02457,2451.0
37,A_55,myGov registration,delay,Unclear,Unclear,2.0,0.02458,2452.0
50,A_55,number of days to lodgement,delay,Unclear,No,1.0,0.02458,2455.0
63,A_55,number of inbound calls,delay,Unclear,No,1.0,0.02456,2453.0
76,A_55,use of MyTax,delay,Unclear,Unclear,2.0,0.02459,2453.0


In [16]:
A_55_c = get_article_gpt_condition_score(master_db_df, file_dict, "A_55")

Condition: BAU letter
Condition: No letter
Condition: Welcome letter


In [17]:
A_62 = get_article_gpt_behavior_score(master_db_df, file_dict, "A_62")

Behavior: Amount automatically saved at t<U+202F>=<U+202F>13 weeks
Behavior: Amount automatically saved at t<U+202F>=<U+202F>38 weeks
Behavior: Amount automatically saved at t<U+202F>=<U+202F>4 weeks
Behavior: Automatic transaction at t<U+202F>=<U+202F>13 weeks
Behavior: Automatic transaction at t<U+202F>=<U+202F>38 weeks
Behavior: Automatic transaction at t<U+202F>=<U+202F>4 weeks
Behavior: Total buffer savings at t<U+202F>=<U+202F>13 weeks
Behavior: Total buffer savings at t<U+202F>=<U+202F>38 weeks
Behavior: Total buffer savings at t<U+202F>=<U+202F>4 weeks
Behavior: click rate - email opt out
Behavior: click rate - personalized website


In [18]:
A_62_c = get_article_gpt_condition_score(master_db_df, file_dict, "A_62")

Condition: control
Condition: social norm


In [19]:
A_87 = get_article_gpt_behavior_score(master_db_df, file_dict, "A_87")

Behavior: Appointment link click
Behavior: Days to vaccination
Behavior: Vaccination rate within 30 days


In [20]:
A_87_c = get_article_gpt_condition_score(master_db_df, file_dict, "A_87")

Condition: Arguments
Condition: Control (no reminders)
Condition: Control (reminders)
Condition: Incentives
Condition: Information
Condition: Social Impact


In [21]:
A_30 = get_article_gpt_behavior_score(master_db_df, file_dict, "A_30")

Behavior: change in BMI, baseline to 12 mo
Behavior: change in BMI, baseline to 24 mo
Behavior: change in HDL cholesterol, baseline to 12 mo
Behavior: change in HDL cholesterol, baseline to 24 mo
Behavior: change in LDL cholesterol, baseline to 12 mo
Behavior: change in LDL cholesterol, baseline to 24 mo
Behavior: change in avg % of green labeled items purchased, baseline to 12 mo
Behavior: change in avg % of green labeled items purchased, baseline to 24 mo
Behavior: change in avg % of red labeled items purchased, baseline to 12 mo
Behavior: change in avg % of red labeled items purchased, baseline to 24 mo
Behavior: change in avg % of yellow labeled items purchased, baseline to 12 mo
Behavior: change in avg % of yellow labeled items purchased, baseline to 24 mo
Behavior: change in avg A1c, baseline to 12 mo
Behavior: change in avg A1c, baseline to 24 mo
Behavior: change in calories purchased per day, baseline to 12 mo
Behavior: change in calories purchased per day, baseline to 24 mo
Be

In [22]:
A_30_c = get_article_gpt_condition_score(master_db_df, file_dict, "A_30")

Condition: control
Condition: treatment


In [40]:
gpt_4_turbo_condition = pd.concat([A_55_c, A_62_c, A_87_c, A_30_c], ignore_index=True)
gpt_4_turbo_condition["Model"] = "gpt-4-1106-preview"

gpt_4_turbo_behavior = pd.concat([A_55, A_62, A_87, A_30], ignore_index=True)
gpt_4_turbo_behavior["Model"] = "gpt-4-1106-preview"

In [35]:
gpt_4_behavior = pd.read_csv("gpt_beavhior.csv").drop(columns=["Unnamed: 0"])
gpt_4_behavior["Model"] = "gpt-4"

gpt_4_condition = pd.read_csv("gpt_condition.csv").drop(columns=["Unnamed: 0"])
gpt_4_condition["Model"] = "gpt-4"

In [46]:
pd.concat([gpt_4_behavior, gpt_4_turbo_behavior], ignore_index=True).to_csv(
    "gpt_behavior.csv", index=False
)


In [47]:
pd.concat([gpt_4_condition, gpt_4_turbo_condition], ignore_index=True).to_csv(
    "gpt_condition.csv", index=False
)

In [3]:
pd.read_csv('gpt_condition.csv')

Unnamed: 0,Paper_Exp_ID,Condition,Variable,Human Answer,GPT Answer,GPT Cost,Model
0,A_55,BAU letter,condition_financial,N,N,0.06849,gpt-4
1,A_55,BAU letter,condition_educational,N,N,0.04371,gpt-4
2,A_55,BAU letter,condition_forbid,N,N,0.04713,gpt-4
3,A_55,BAU letter,condition_channel_snailmail,Y,N,0.04023,gpt-4
4,A_55,BAU letter,condition_channel_email,N,Y,0.03222,gpt-4
...,...,...,...,...,...,...,...
307,A_30,treatment,condition_channel_mobileapp,N,N,0.04801,gpt-4-1106-preview
308,A_30,treatment,condition_channel_online,N,Y,0.04870,gpt-4-1106-preview
309,A_30,treatment,condition_channel_sms,N,N,0.03240,gpt-4-1106-preview
310,A_30,treatment,condition_channel_phone,N,N,0.03423,gpt-4-1106-preview


In [76]:
pd.concat([gpt_4_behavior, gpt_4_turbo_behavior], ignore_index=True).groupby('Model').sum()

Unnamed: 0_level_0,GPT Cost
Model,Unnamed: 1_level_1
gpt-4,30.07068
gpt-4-1106-preview,30.0731


In [None]:
behavior_description_2 = """
Name the behavioral outcome in a few words. If the authors have a clear, short description of each \
behavior in the paper itself, copy their words here instead of paraphrasing. If their description \
is not clear enough on its own, or is very verbose, please paraphrase here. \
Give a JSON list of each behavioral outcome.
"""

A_19 = ArticleQA(read_files_to_vector(['sample_paper/A_19_2022_DoHonestyNudges.mmd']))
answer = A_19.query_context(behavior_description_2, 'JSON format')

In [None]:
answer


{'gpt_answer': {'query': '\nGive each experiment condition a one or two word name to describe it.  Where possible, use the label the research authors give it. Look at tables/figures to see their naming conventions for the conditions and use those if they exist. Give a JSON list with each experiment condition in it.\n',
  'result': '["Business as Usual", "Good Practice", "Novel Model"]',
  'source_documents': [Document(page_content='1. The sample doesn\'t capture the digitally excluded, or people not inclined to complete online surveys.\n2. Just because people are they would do something in an online experiment when playing with "house money" doesn\'t mean they will in real life. We therefore interpret play percentages as an upper bound of real behaviour, and focus primarily on differences between arms.\n3. Our sample size was chosen to provide adequate statistical power for our main outcomes of interest, and so we recommend interpreting comparisons for subgroups with caution.  \nAppend

In [None]:
# Human answer

[
    "Claimed amount",
    "Claim Settlement Diff",
    "Session Cancellation or not",
    "Claim rejection or not",
    "Event description length",
]

# GPT 3.5 16k answer
[
    "Reduced claimed amount",
    "Reduced claim-settlement difference",
    "Increased session cancelation",
    "Reduced claim rejection",
    "Increased event description length",
]
# GPT 4 answer
[
    {
        "Behavioral Outcome": "Claimed Amount",
        "Description": "The amount claimed for settlement by the customer.",
    },
    {
        "Behavioral Outcome": "Claim-Settlement Difference",
        "Description": "The difference between the claimed amount and the final settlement amount issued by the provider.",
    },
    {
        "Behavioral Outcome": "Session Cancelation",
        "Description": "If the customer cancels filling the claim.",
    },
    {
        "Behavioral Outcome": "Claim Rejection",
        "Description": "If the customer’s claims are rejected.",
    },
    {
        "Behavioral Outcome": "Event Description Length",
        "Description": "The number of characters used to describe damages.",
    },
]



In [None]:
behavior_description_2 = """
Name the behavioral outcome in a few words. If the authors have a clear, short description of each \
behavior in the paper itself, copy their words here instead of paraphrasing. If their description \
is not clear enough on its own, or is very verbose, please paraphrase here. \
Give a JSON list of each behavioral outcome.
"""

expirment_condition = """
Give each experiment condition a one or two word name to describe it.  Where possible, use the label the research authors give it. Look at tables/figures to see their naming conventions for the conditions and use those if they exist. Give a JSON list with each experiment condition in it.
"""

P_157 = ArticleQA(read_files_to_vector(["sample_paper/P_157_2023_HowDoSlot_BIT.mmd"]))
answer = P_157.query_context(expirment_condition, "JSON format")

In [None]:
answer['gpt_answer']['result']

'["Business as Usual", "Good Practice", "Novel Model"]'

In [None]:
# GPT 3.5 16k behavioral answer
[
    "Most of the slot game advert features tested did not affect behavior on average, nor the perceived chances of winning.",
    "Listing T&Cs that do not apply more saliently than those that do apply marginally reduced comprehension of the other T&Cs.",
    "Features that emphasized the game had low risk to potential reward or the 'ease of winning' increased total amount staked.",
    "Features that emphasized the game 'fun' reduced stakes.",
    "Features had a differential impact on individuals with high Problem Gambling Severity Index (short-form PGSI) scores and older individuals.",
]
# GPT 3.5 16k condition experiment answer
[
    "Business as usual",
    "Low risk to potential reward",
    "Ease of winning",
    "Fun-framing",
    "Good practice",
]

# GPT 4 behavioral answer
[
    "Comprehension of gambling odds",
    "Lower-risk gambling guidelines",
    "DSS gambling legging testing and implementation",
    "Evaluation of the 'take time to think' safer gambling message",
]
# GPT 4 condition experiment answer
[
    "Business as Usual", 
    "Good Practice", 
    "Novel Model"
]

In [63]:
q_and_a_behavior_df["unit"] = "behavior"
q_and_a_experiment_df["unit"] = "condition"

analysis = pd.concat([q_and_a_behavior_df, q_and_a_experiment_df], ignore_index=True).drop(
    columns=["Step"]
).rename(
    columns={
        "Variable name": "variable",
        "Explanation": "question",
        "Coding scheme": "coding",
    }
)#.to_csv("units_of_analysis.csv", index=False)

In [69]:
analysis['variable'] = analysis['variable'].apply(lambda x: x.replace('behavior_', '')).apply(lambda x: x.replace('condition_', ''))

In [71]:
analysis.to_csv("units_of_analysis.csv", index=False)

In [72]:
analysis

Unnamed: 0,variable,question,coding,unit
0,description,Describe the behavioral outcome in a few words...,Free text,behavior
1,priority,"Is the behavioral outcome labeled as primary, ...",primary/secondary/none,behavior
2,focal,Select the ONE behavior that is focal for the ...,focal/not,behavior
3,focal_estimate,If you had to estimate which behavior was foca...,"Exact, Estimate",behavior
4,metric,Is the each behavioral variable categorical/bi...,categorical/continuous,behavior
5,selfreport,Is the behavior captured by self-reported data...,Y/N,behavior
6,hypothesis_gist,"In your own words, what is the hypothesis with...",Free text,behavior
7,hypothesis_authors,Please copy a segment of the text that contain...,Quotation,behavior
8,direction,Look at the hypothesis. In which direction doe...,"Up, Down, Unclear",behavior
9,frequency,Does doing the behavior well mean doing it reg...,"No; Yes, annually; Yes, monthly; Yes, weekly; ...",behavior
