In [1]:
import os
import re
import sys
import fitz
import json
import yaml
import warnings
import pandas as pd
from datetime import datetime as dt
from openai import OpenAI
warnings.filterwarnings("ignore")

In [2]:
# module_dir = os.path.dirname(os.path.realpath(__file__))
root_dir_name = os.path.dirname('./')

input_data_dir_name = "input_data"
output_dir_name = "sample_result_submission"
program_dir_name = "ingestion_program"
submission_dir_name = "sample_code_submission"

# Input data directory to read training and test data from
input_dir = os.path.join(root_dir_name, input_data_dir_name)
# Output data directory to write predictions to
output_dir = os.path.join(root_dir_name, output_dir_name)
# Program directory
program_dir = os.path.join(root_dir_name, program_dir_name)
# Directory to read submitted submissions from
submission_dir = os.path.join(root_dir_name, submission_dir_name)

sys.path.append(input_dir)
sys.path.append(output_dir)
sys.path.append(program_dir)
sys.path.append(submission_dir)

from constants import API_KEY

In [3]:
def clean(paper):
    # clean title
    paper["title"] = clean_title(paper["title"])

    # clean paper
    paper["paper"] = clean_paper(paper["paper"])

    # clean checklist
    paper["checklist"] = clean_checklist(paper["checklist"])

    return paper

def clean_title(text):
    text = re.sub(r'\n', '', text)
    text = re.sub(r'\-\s*\n', '', text)
    text = text.strip()
    return text

def clean_paper(text):
    text = re.sub(r'\n\d+', ' ', text)
    text = re.sub(r'\-\s*\n', '', text)
    text = re.sub(r'([a-zA-Z]\.\d+)\n', r'\1 ', text)
    text = re.sub(r'([a-zA-Z])\n', r'\1 ', text)
    text = text.replace("’", "'")
    text = text.replace("\\'", "'")
    text = text.replace("- ", "")
    processed_text = ""
    lines = text.split('\n')
    for line in lines:
        line = line.strip()
        if len(line.split()) < 6:
            processed_text += '\n'
            processed_text += line + '\n'
        else:
            processed_text += line
            processed_text += ' '
    text = processed_text.strip()
    return text

def clean_checklist(text):
    text = re.sub(r'\n\d+', ' ', text)
    text = re.sub(r'\-\s*\n', '', text)
    text = re.sub(r'  . ', '\n', text)
    text = re.sub(r'([a-zA-Z]\.\d+)\n', r'\1 ', text)
    text = re.sub(r'([a-zA-Z])\n', r'\1 ', text)
    text = text.replace("’", "'")
    text = text.replace("\\'", "'")
    text = text.replace("- ", "")
    text = re.sub(r'\n+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = text.strip()
    return text

def clean_guidelines(text):
    checklist_titles = [
        "Limitations",
        "Theory Assumptions and Proofs",
        "Experimental Result Reproducibility",
        "Open access to data and code",
        "Experimental Setting/Details",
        "Experiment Statistical Significance",
        "Experiments Compute Resources",
        "Code Of Ethics",
        "Broader Impacts",
        "Safeguards",
        "Licenses for existing assets",
        "New Assets",
        "Crowdsourcing and Research with Human Subjects",
        "Institutional Review Board (IRB) Approvals or Equivalent for Research with Human Subjects",
    ]
    for checklist_title in checklist_titles:
        text = text.replace(checklist_title, '')
    return text

def get_paper_chunks(paper_text):

    try:
        # Identify main paper and appendices
        paper_end_index = paper_text.find("NeurIPS Paper Checklist")

        if paper_end_index == -1:
            raise ValueError("[-] Error: NeurIPS Paper Checklist not found")

        paper = paper_text[:paper_end_index]

        # Identify checklist section
        checklist_start_index = paper_end_index
        checklist = paper_text[checklist_start_index:]

        # Identify title
        title_end_index = paper.find("Anonymous Author")
        if title_end_index == -1:
            title = paper.split("\n")[:2]
            title = ''.join(title)
        else:
            title = paper[:title_end_index]

        return {
            "title": title,
            "paper": paper,
            "checklist": checklist
        }
    except ValueError as ve:
        raise ve
    except Exception as e:
        raise Exception(f"[-] Error occurred while extracting paper chunks in the {'paper' if not paper else 'checklist'} section: {e}")


In [4]:
def get_pdf_text(pdf_file):
    pdf_file_path = os.path.join(submission_dir, pdf_file)
    paper_text = ""
    with fitz.open(pdf_file_path) as doc:
        for page in doc:
            paper_text += page.get_text()
    return paper_text

def load_yaml(yaml_file):
    yaml_file_path = os.path.join(submission_dir, yaml_file)

    with open(yaml_file_path, 'r') as file:
        yaml_data = yaml.safe_load(file)

    if not yaml_data:
        raise ValueError("[-] The YAML file is empty or invalid.")

    for key, value in yaml_data.items():
        # Check if key/question is an integer between 1 and 15
        if not isinstance(key, int) or not 1 <= key <= 15:
            raise ValueError("[-] Invalid key: Keys must be integers between 1 and 15.")

        # Check if value is 0, 0.5, or 1
        if value not in [0, 0.5, 1]:
            raise ValueError("[-] Invalid value: Values must be 0, 0.5, or 1.")

    return yaml_data


In [5]:
def parse_checklist(checklist):

    checklist_questions = [
        "Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?",
        "Does the paper discuss the limitations of the work performed by the authors?",
        "For each theoretical result, does the paper provide the full set of assumptions and a complete (and correct) proof?",
        "Does the paper fully disclose all the information needed to reproduce the main experimental results of the paper to the extent that it affects the main claims and/or conclusions of the paper (regardless of whether the code and data are provided or not)?",
        "Does the paper provide open access to the data and code, with sufficient instructions to faithfully reproduce the main experimental results, as described in supplemental material?",
        "Does the paper specify all the training and test details (e.g., data splits, hyperparameters, how they were chosen, type of optimizer, etc.) necessary to understand the results?",
        "Does the paper report error bars suitably and correctly defined or other appropriate information about the statistical significance of the experiments?",
        "For each experiment, does the paper provide sufficient information on the computer resources (type of compute workers, memory, time of execution) needed to reproduce the experiments?",
        "Does the research conducted in the paper conform, in every respect, with the NeurIPS Code of Ethics https://neurips.cc/public/EthicsGuidelines?",
        "Does the paper discuss both potential positive societal impacts and negative societal impacts of the work performed?",
        "Does the paper describe safeguards that have been put in place for responsible release of data or models that have a high risk for misuse (e.g., pretrained language models, image generators, or scraped datasets)?",
        "Are the creators or original owners of assets (e.g., code, data, models), used in the paper, properly credited and are the license and terms of use explicitly mentioned and properly respected?",
        "Are new assets introduced in the paper well documented and is the documentation provided alongside the assets?",
        "For crowdsourcing experiments and research with human subjects, does the paper include the full text of instructions given to participants and screenshots, if applicable, as well as details about compensation (if any)?",
        "Does the paper describe potential risks incurred by study participants, whether such risks were disclosed to the subjects, and whether Institutional Review Board (IRB) approvals (or an equivalent approval/review based on the requirements of your country or institution) were obtained?"
    ]

    checklist_question_titles = [
        "Claims",
        "Limitations",
        "Theoritical assumptions and proofs",
        "Experiments reproducibility",
        "Code and data accessibility",
        "Experimental settings/details",
        "Error bars",
        "Compute resources",
        "NeurIPS code of ethics",
        "Impacts",
        "Safeguards",
        "Credits",
        "Documentation",
        "Human subjects",
        "Risks"
    ]

    checklist_df = pd.DataFrame(columns=['Question', 'Question_Title', 'Answer', 'Justification', 'Guidelines', 'Review', 'Score'])
    try:
        for question_index, question in enumerate(checklist_questions):
            question_regex = re.escape(question)
            pattern = re.compile(rf"Question:\s+{question_regex}(?:.*?Answer:\s+\[(.*?)\].*?Justification:\s+(.*?))(?:Guidelines:\s+(.*?))(?=Question:|\Z)", re.DOTALL)

            mtch = pattern.search(checklist)
            if mtch:
                answer = mtch.group(1).strip()
                justification = mtch.group(2).strip() if mtch.group(2).strip() else None
                guidelines = mtch.group(3).strip() if mtch.group(3).strip() else None
                if guidelines:
                    guidelines = clean_guidelines(guidelines)

                if justification is not None and justification.isdigit():
                    justification = None

            else:
                answer, justification, guidelines = "Not Found", "Not Found", "Not Found"

            temp_df = pd.DataFrame([{'Question': question, 'Question_Title': checklist_question_titles[question_index], 'Answer': answer, 'Justification': justification, 'Guidelines': guidelines}])
            checklist_df = pd.concat([checklist_df, temp_df], ignore_index=True)

        return checklist_df

    except Exception as e:
        raise ValueError(f"[-] Error in extracting answers and justifications: {e}")

In [6]:
def process_paper():

    # -----
    # Load PDF from submissions dir
    # -----
    print("[*] Loading PDF paper")
    # get all files from submissions dir
    files = os.listdir(submission_dir)
    pdf_file = None
    for file in files:
        if file.endswith('.pdf'):
            pdf_file = file
            break

    if not pdf_file:
        raise ValueError("[-] No PDF file found in the submission directory!")
    print(f"[+] PDF file: {pdf_file}")

    ground_truth_file = f"{pdf_file.split('.pdf')[0]}.yaml"
    if ground_truth_file not in files:
        print(f"[!] Ground Truth YAML file not found!. This may happen if your YAML file is not named as: {ground_truth_file}")
    else:
        print(f"[+] YAML file: {ground_truth_file}")

    print("[✔]")

    # -----
    # Load text from PDF
    # -----
    print("[*] Loading and converting PDF to Text")
    paper_text = get_pdf_text(pdf_file)
    print("[✔]")

    # -----
    # Get paper chunks
    # -----
    print("[*] Breaking down paper into chunks and cleaning text")
    paper = clean(get_paper_chunks(paper_text))
    print("[✔]")

    # -----
    # Load Ground Truth Scores
    # -----
    if ground_truth_file:
        print("[*] Loading Ground Truth YAML")
        paper["ground_truth"] = load_yaml(ground_truth_file)
        print("[✔]")
    else:
        paper["ground_truth"] = None

    # -----
    # Parse Checklist
    # -----
    print("[*] Parsing checklist from text")
    paper["checklist_df"] = parse_checklist(paper["checklist"])
    print("[✔]")
    return paper


In [7]:
paper = process_paper()

[*] Loading PDF paper
[+] PDF file: genuine_MetaAlbum.pdf
[+] YAML file: genuine_MetaAlbum.yaml
[✔]
[*] Loading and converting PDF to Text
[✔]
[*] Breaking down paper into chunks and cleaning text
[✔]
[*] Loading Ground Truth YAML
[✔]
[*] Parsing checklist from text
[✔]


In [8]:
def get_LLM_feedback(paper, checklist_df, ground_truth, question_n, paper_prompt):

    client = OpenAI(
        api_key=API_KEY,
    )

    model = "gpt-4-turbo-preview"
    max_tokens = 1000
    temperature = 1
    top_p = 1
    n = 1

    for index, row in checklist_df.iterrows():

        question_number = index + 1
        if question_number == question_n:
            skip_question = ground_truth is not None and question_number not in ground_truth

            if skip_question:
                print(f"[!] Skipping Question # {question_number}")
                continue
            q = row["Question"]
            a = row["Answer"]
            j = row["Justification"]
            g = row["Guidelines"]

            print(f"Question: {q}")
            print(f"Answer: {a}")
            print(f"Justification: {j}")

            paper_prompt = paper_prompt.replace("{paper}", paper)
            paper_prompt = paper_prompt.replace("{q}", q)
            paper_prompt = paper_prompt.replace("{a}", a)
            paper_prompt = paper_prompt.replace("{j}", j)
            paper_prompt = paper_prompt.replace("{g}", g)

            user_prompt = {
                "role": "user",
                "content": paper_prompt
            }

            messages = [user_prompt]
            chat_completion = client.chat.completions.create(
                model=model,
                messages=messages,
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=top_p,
                n=n
            )

            gpt_review = chat_completion.choices[0].message.content

            score = 99
            text = gpt_review
            score_pattern1 = r"Score:\s*([0-9]+(?:\.[0-9]+)?)"
            score_pattern2 = r"\*\*Score\*\*:\s*([0-9]+(?:\.[0-9]+)?)"

            match1 = re.search(score_pattern1, gpt_review)
            match2 = re.search(score_pattern2, gpt_review)

            if match1:
                score = match1.group(1)
                text = re.sub(r"Score:.*(\n|$)", "", text)
            elif match2:
                score = match2.group(1)
                text = re.sub(r"**Score**:.*(\n|$)", "", text)

            checklist_df.loc[index, 'Review'] = text
            checklist_df.loc[index, 'Score'] = score
            print(f"[+] Question # {question_number}")

            print(f"\nScore:\n{score}")
            print(f"\n\nReview:\n{text}")

In [None]:
paper_prompt = "You are provided with a paper to be submitted to the NeurIPS conference. You are assisting the authors in preparing their answer to one “Quality Control Question”. Please examine carefully the “Proposed Author's Answer” and the “Proposed Author's Justification to the Answer” and identify any discrepancies with the actual ”Paper Content”, for this specific question, taking into account the “Guidelines Provided to Authors”. Afterwards, provide detailed, actionable feedback, based on the “Guidelines Provided to Authors”, aiming to improve the paper quality. Conclude your review with a score in a separate line: 1 if there are no significant concerns. 0.5 there is ground for improvement. 0 If critical issues must be addressed, that could lead to paper rejection. Make sure that score is shown in a new line in this format `Score: score_value` and there is no content after the score.\n\nQuality Control Question: {q}\nProposed Author's Answer: {a}\nProposed Author's Justification to the Answer: {j}\nGuidelines Provided to Authors: {g}\nPaper Content: {paper}"

get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)


In [None]:
paper_prompt = "You are provided with a paper to be submitted to the NeurIPS conference. You are assisting the authors in preparing their answer to one “Quality Control Question”. Please examine carefully the “Proposed Author's Answer” and the “Proposed Author's Justification to the Answer” and identify any discrepancies with the actual ”Paper Content”, for this specific question, taking into account the “Guidelines Provided to Authors”. Afterwards, provide short, to the point, actionable feedback, based on the “Guidelines Provided to Authors” if there is room for improvement, aiming to improve the paper quality. Conclude your review with a score in a separate line: 1 if there are no significant concerns. 0.5 there is ground for improvement. 0 If critical issues must be addressed, that could lead to paper rejection. Make sure that score is shown in a new line in this format `Score: score_value` and there is no content after the score.\n\nQuality Control Question: {q}\nProposed Author's Answer: {a}\nProposed Author's Justification to the Answer: {j}\nGuidelines Provided to Authors: {g}\nPaper Content: {paper}"
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)


In [None]:
paper_prompt = "You are computer science conference reviewer. You are provided with a paper and you are assisting the authors in evaluating their answer to a “Quality Control Question”. Please examine carefully the “Proposed Author's Answer” and the “Proposed Author's Justification to the Answer” and identify any discrepancies with the actual ”Paper Content”, for this specific question, taking into account the “Guidelines Provided to Authors”. Afterwards, provide short, to the point, actionable feedback only if there is a discrepency. Conclude your review with a score in a separate line: 1 if there are no significant concerns. 0.5 there is ground for improvement. 0 If critical issues must be addressed, that could lead to paper rejection. Make sure that score is shown in a new line in this format `Score: score_value` and there is no content after the score.\n\nQuality Control Question: {q}\nProposed Author's Answer: {a}\nProposed Author's Justification to the Answer: {j}\nGuidelines Provided to Authors: {g}\nPaper Content: {paper}"
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)


In [None]:
paper_prompt = "You are tasked with reviewing a paper set to be submitted to the NeurIPS conference. Your role involves assisting the authors by evaluating their response to a specific 'Quality Control Question'. Carefully review the 'Proposed Author's Answer' and the 'Proposed Author's Justification to the Answer'. Check for any inconsistencies with the 'Paper Content' in light of the 'Guidelines Provided to Authors'. After your evaluation, provide concise, actionable feedback aimed at enhancing the paper's quality based on the guidelines. Conclude your review with a score on a new line as follows, based on your assessment: 1 for no significant concerns, 0.5 for areas needing improvement, and 0 for critical issues that could lead to rejection. Ensure the score is displayed in this format Score: score_value. There should be no content after the score.\n\nQuality Control Question: {q}\nProposed Author's Answer: {a}\nProposed Author's Justification to the Answer: {j}\nGuidelines Provided to Authors: {g}\nPaper Content: {paper}"

get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)

In [None]:
paper_prompt = """
You are tasked with reviewing a paper set to be submitted to the NeurIPS conference. 
Your role is to evaluate the authors response to a specific 'Quality Control Question'. 
Carefully review the 'Proposed Author's Answer' and the 'Proposed Author's Justification to the Answer'. 
Check for any inconsistencies with the 'Paper Content' in light of the 'Guidelines Provided to Authors'. 
After your evaluation, provide concise, actionable feedback aimed at enhancing the paper's quality based on the guidelines. 
Conclude your review with a score on a new line as follows, based on your assessment: 
1 for no significant concerns, 
0.5 for areas needing improvement, 
and 0 for critical issues that could lead to rejection. 
Ensure the score is displayed in this format Score: score_value. There should be no content after the score.
NOTE: only give score 0.5 when the current state of the paper is not acceptable.

Quality Control Question: {q}
Proposed Author's Answer: {a}
Proposed Author's Justification to the Answer: {j}
Guidelines Provided to Authors: {g}
Paper Content: {paper}
"""
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)

### Prompt 4

In [9]:
paper_prompt = """
You are provided with a “Paper Content” to be submitted to the NeurIPS conference. You are assisting the authors in preparing their answer to one “Quality Control Question”. Please examine carefully the “Proposed Author's Answer” and the “Proposed Author's Justification to the Answer”. Then proceed step by step: If a section is cited in the "Proposed Author's Justification for the Answer," summarize the relevant text referenced from the "Paper Content"; otherwise, please fetch by yourself relevant justification text. Identify and list any discrepancies between the author's response to the "Quality Control Question" (including both the "Proposed Author's Answer" and the "Proposed Author's Justification for the Answer") and the "Paper Content."Provide itemized, actionable feedback, based on the “Guidelines Provided to Authors”, aiming to improve the paper quality. Concentrate on a few of the most significant improvements that can be made, and write in terse technical English.

Conclude your review with a score in a separate line: 
1: The paper is acceptable without changes.
0.5: Improvements are recommended to enhance the likelihood of acceptance, though no fatal flaws exist.
0: Critical issues must be resolved, as they could almost certainly cause rejection if unaddressed.
Make sure that score is shown in a new line in this format `Score: score_value` and there is no content after the score.
Quality Control Question: {q}
Proposed Author's Answer: {a}
Proposed Author's Justification to the Answer: {j}
Guidelines Provided to Authors: {g}
Paper Content: {paper}

Let's think step by step. The score should be formatted as "Score: 0", "Score: 0.5" or "Score: 1" in a separate final line.

"""
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)

Question: Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?
Answer: Yes
Justification: The abstract and introduction clearly state the main contributions and scope of the paper
[+] Question # 1

Score:
0.5


Review:
After analyzing the provided materials, including the "Proposed Author's Answer" and "Proposed Author's Justification to the Answer" in conjunction with the "Paper Content" and the "Guidelines Provided to Authors," some discrepancies and areas for improvement have been identified. Below, these concerns are addressed with actionable feedback aimed at enhancing the submission quality and alignment with the conference standards.

1. **Clarity and specificity in claims:**  
   The "Proposed Author's Justification to the Answer" states that the abstract and introduction clearly state the main contributions and scope of the paper. While this might be true at a high level, the provided excerpts from the paper content i

In [10]:
paper_prompt = """
You are provided with a “Paper Content” to be submitted to the NeurIPS conference. You are assisting the authors in preparing their answer to one “Quality Control Question”. Please examine carefully the “Proposed Author's Answer” and the “Proposed Author's Justification to the Answer”. Then proceed step by step: If a section is cited in the "Proposed Author's Justification for the Answer," summarize the relevant text referenced from the "Paper Content"; otherwise, please fetch by yourself relevant justification text. Identify and list any discrepancies between the author's response to the "Quality Control Question" (including both the "Proposed Author's Answer" and the "Proposed Author's Justification for the Answer") and the "Paper Content."Provide itemized, actionable feedback, based on the “Guidelines Provided to Authors”, aiming to improve the paper quality. Concentrate on a few of the most significant improvements that can be made, and write in terse technical English.

Conclude your review with a score in a separate line: 
1: The paper is acceptable without changes.
0.5: Improvements are recommended to enhance the likelihood of acceptance, though no fatal flaws exist.
0: Critical issues must be resolved, as they could almost certainly cause rejection if unaddressed.
Make sure that score is shown in a new line in this format `Score: score_value` and there is no content after the score.
Quality Control Question: {q}
Proposed Author's Answer: {a}
Proposed Author's Justification to the Answer: {j}
Guidelines Provided to Authors: {g}
Paper Content: {paper}

Let's think step by step. The score should be formatted as "Score: 0", "Score: 0.5" or "Score: 1" in a separate final line.

"""
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    3,
    paper_prompt
)

Question: For each theoretical result, does the paper provide the full set of assumptions and a complete (and correct) proof?
Answer: N/A
Justification: This does not apply to our research work because our research is not theoretical rather it deals with a meta-dataset creation and experiments executed using this meta-dataset
[+] Question # 3

Score:
0.5


Review:
### Discrepancies Found:
1. **Theoretical Results**: The proposed author's justification claims that the paper does not include theoretical results since it focuses on meta-dataset creation and experimental evaluation. However, the guidelines clearly stipulate that any paper, regardless of its primary focus, that includes theorems, formulas, and proofs, must adhere to specific formatting and presentation standards, including numbering, cross-referencing, and stating assumptions. This discrepancy indicates a misunderstanding or misinterpretation of what constitutes "theoretical results" within the scope of the guidelines.

###

# Prompt 3.5

In [13]:
paper_prompt = """
You are provided with a “Paper” to be submitted to the NeurIPS conference. You are assisting the authors in preparing their “Answer” to one checklist “Question”. Please examine carefully the proposed author's “Answer” and the proposed author's “Justification” provided, and identify any discrepancies with the actual ”Paper” content, for this specific “Question”, taking into account the “Guidelines” provided to authors. Afterwards, provide itemized, actionable feedback, based on the “Guidelines”, aiming to improve the paper quality. Concentrate on a few of the most significant improvements that can be made, and write in terse technical English.
Conclude your review with a score in a separate line: 
1: The paper is acceptable without changes.
0.5: Improvements are recommended to enhance the likelihood of acceptance, though no fatal flaws exist.
0: Critical issues must be resolved, as they could almost certainly cause rejection if unaddressed.
Make sure that score is shown in a new line in this format “Score: score_value” and there is no content after the score.
Question: {q}
Answer: {a}
Justification: {j}
Guidelines: {g}
Paper: {paper}

"""
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    1,
    paper_prompt
)

Question: Do the main claims made in the abstract and introduction accurately reflect the paper's contributions and scope?
Answer: Yes
Justification: The abstract and introduction clearly state the main contributions and scope of the paper
[+] Question # 1

Score:
0.5


Review:
The author's answer to the checklist question regarding the accuracy of the claims made in the abstract and introduction with respect to the paper's contributions and scope is a simple affirmation. However, the justification provided does not specifically address how the claims in the abstract and introduction match the theoretical and experimental results within the paper or discuss the generalizability of these results to other settings. The guidelines emphasize the importance of explicitly stating the paper's contributions, including any important assumptions and limitations, and ensuring that the claims made are reflective of the results obtained and their potential applicability beyond the experimental setu

In [14]:
paper_prompt = """
You are provided with a “Paper” to be submitted to the NeurIPS conference. You are assisting the authors in preparing their “Answer” to one checklist “Question”. Please examine carefully the proposed author's “Answer” and the proposed author's “Justification” provided, and identify any discrepancies with the actual ”Paper” content, for this specific “Question”, taking into account the “Guidelines” provided to authors. Afterwards, provide itemized, actionable feedback, based on the “Guidelines”, aiming to improve the paper quality. Concentrate on a few of the most significant improvements that can be made, and write in terse technical English.
Conclude your review with a score in a separate line: 
1: The paper is acceptable without changes.
0.5: Improvements are recommended to enhance the likelihood of acceptance, though no fatal flaws exist.
0: Critical issues must be resolved, as they could almost certainly cause rejection if unaddressed.
Make sure that score is shown in a new line in this format “Score: score_value” and there is no content after the score.
Question: {q}
Answer: {a}
Justification: {j}
Guidelines: {g}
Paper: {paper}

"""
get_LLM_feedback(
    paper["paper"], 
    paper["checklist_df"], 
    paper["ground_truth"],
    3,
    paper_prompt
)

Question: For each theoretical result, does the paper provide the full set of assumptions and a complete (and correct) proof?
Answer: N/A
Justification: This does not apply to our research work because our research is not theoretical rather it deals with a meta-dataset creation and experiments executed using this meta-dataset
[+] Question # 3

Score:
0.5


Review:
**Feedback on the Proposed Author's Answer and Justification:**

The provided "Paper" content and the authors' response seem misaligned with the checklist question "For each theoretical result, does the paper provide the full set of assumptions and a complete (and correct) proof?" The response indicates that this is not applicable because their work is on meta-dataset creation and experimental execution, which is primarily empirical in nature and does not directly engage with theoretical proofs or assumptions in the manner that a more mathematically inclined paper might.

The essential improvements necessary to enhance paper 