In [1]:
import pandas as pd

# Load the Excel file
file_path = "Parsed_Evaluated_Questions_fixed.xlsx"
xls = pd.ExcelFile(file_path)

# Load the data from the identified sheet
df = pd.read_excel(xls, sheet_name="Sheet1")

# Identify criteria columns (grades for each criterion)
criteria_columns = [col for col in df.columns if "Grade" in col and col != "Average"]

# Filter high-scoring questions (average score >= 2.5)
high_scoring_df = df[df["Average"] >= 2.5]

# Select one high-scoring question per criterion (highest score per column)
high_scoring_questions = pd.DataFrame()

for criterion in criteria_columns:
    top_question = high_scoring_df.loc[high_scoring_df[criterion].idxmax()]
    high_scoring_questions = pd.concat([high_scoring_questions, top_question.to_frame().T])

# Remove duplicates (in case the same question is selected multiple times)
high_scoring_questions = high_scoring_questions.drop_duplicates()

# Create a low-scoring dataset (questions that were not selected)
low_scoring_df = df.drop(high_scoring_questions.index)

# Save the extracted datasets as CSV files
high_scoring_questions.to_csv("High_Scoring_Questions.csv", index=False)
low_scoring_df.to_csv("Low_Scoring_Questions.csv", index=False)

print("Extraction complete. The datasets have been saved as CSV files.")


Extraction complete. The datasets have been saved as CSV files.


In [6]:
high_scoring_questions.head()

Unnamed: 0,Question,Choices,Answer,Relevance Grade,Relevance Reason,Curriculum Fit Grade,Curriculum Fit Reason,Accuracy Grade,Accuracy Reason,Clarity Grade,...,Fairness Reason,Avoiding Plagiarism Grade,Avoiding Plagiarism Reason,Novelty Grade,Novelty Reason,DOK Level Grade,DOK Level Reason,Bloom's Taxonomy Grade,Bloom's Taxonomy Reason,Average
8,What happens to the particles in the water whe...,"['The particles move faster and spread out.',\...",1,3,The question aligns well with learning outcome...,3,This question is appropriate for middle school...,3,The question is accurate; boiling involves par...,3,...,The question ensures fairness and accessibilit...,2,The question is fairly standard and doesn't sh...,1,The question lacks novelty or a fresh perspect...,2.0,"The question reflects a basic recall level, wh...",2,The question tests the 'Understand' level of B...,2.555556
10,Select the invertebrate.,"['a butterfly' 'a dog' 'a bird'],",1,3,The question correctly targets the identificat...,3,The question is directly relevant to elementar...,2,The answer key points to the wrong choice ('1'...,3,...,The question is fair and accessible to a diver...,3,"The content appears fresh and unique, not a re...",2,The question presents a common topic without i...,2.0,"The question aligns with the Recall level, whi...",2,"This question assesses the Remember level, foc...",2.611111
43,Select the mammals.,['a blue jay' 'a goldfish' 'a house cat' 'a ho...,1,3,The question aligns with learning outcomes rel...,3,Identifying mammals fits well within elementar...,2,"The content is mostly accurate; however, the i...",3,...,The question is fair and accessible to diverse...,3,The question appears unique and not a rephrase...,3,The question skillfully uses common animals to...,2.0,"The question reflects recall, a lower DOK leve...",2,The question aligns with the 'Remember' level ...,2.555556
102,Select the elementary substance.,['Aluminum' 'calcium hydroxide''silicon dioxid...,0,3,The question aligns with standard learning out...,3,It is suitable for a high school science curri...,3,The content is accurate. Aluminum is indeed an...,2,...,"The question is fair and accessible, assuming ...",3,The question appears unique and doesn't seem t...,2,While the question introduces important concep...,3.0,The question reflects the appropriate Depth of...,3,"The question requires students to ""Remember"" a...",2.666667


In [7]:
low_scoring_df.head()

Unnamed: 0,Question,Choices,Answer,Relevance Grade,Relevance Reason,Curriculum Fit Grade,Curriculum Fit Reason,Accuracy Grade,Accuracy Reason,Clarity Grade,...,Fairness Reason,Avoiding Plagiarism Grade,Avoiding Plagiarism Reason,Novelty Grade,Novelty Reason,DOK Level Grade,DOK Level Reason,Bloom's Taxonomy Grade,Bloom's Taxonomy Reason,Average
0,Based on the information provided,['When the salt is dissolved in water.' 'When ...,1,1,"The question lacks sufficient context, making ...",0,Without specific subject or grade level contex...,1,"The answer choices lack context, causing uncer...",0,...,"Without context, it's challenging to ensure fa...",3,"The question seems unique, though verifying it...",0,"Due to missing context, there is no opportunit...",0.0,Insufficient context makes it impossible to de...,0,Without knowing the question's intent or conte...,1.333333
1,Which of these questions can be investigated w...,['What is the average height of a tomato plant...,2,2,The question partially aligns with investigati...,2,The question is suitable for a middle-grade sc...,2,"The question is mostly accurate, but the phras...",2,...,The question is accessible and fair to all lea...,2,"The question is somewhat generic, raising conc...",1,The question lacks novelty or a fresh perspect...,2.0,The question reflects the 'Strategic Thinking'...,2,The question aligns with the 'Analyze' level o...,2.388889
2,What is the inherited trait that this rabbit has?,['The rabbit has white fur.' 'The rabbit has a...,0,2,The question is somewhat relevant to the genet...,2,The question can fit a biology curriculum disc...,1,The question's wording is somewhat ambiguous. ...,2,...,"While the question is generally fair, the lack...",2,"There is no evidence of direct copying, but th...",0,This question lacks any novel or unique approa...,1.0,The question targets a low Depth of Knowledge ...,1,The question aligns with the 'Remember' level ...,2.0
3,What happens to the sugar?\n Choices: \n['It i...,['It is a physical change.' 'It is a chemical ...,1,2,The question is moderately aligned with common...,2,The question fits within science curricula tha...,1,The question is overly simplistic and ambiguou...,2,...,The lack of context could make it confusing fo...,3,The question appears to be original and not a ...,1,The question lacks novelty or a fresh perspect...,1.0,"The question lacks depth, as it does not engag...",1,"The question aligns with the ""Remember"" level ...",2.055556
4,Select the plant.,"['a cat' 'a rose bush' 'a tomato plant'],",1,2,While the question is related to identifying a...,2,The question is suitable for early elementary ...,3,The information is accurate. A rose bush is in...,3,...,The question is fair and accessible to diverse...,2,The question could be a common pedagogical exa...,1,"It lacks novelty, as identifying basic plants ...",1.0,The question is at a recall level of Depth of ...,2,The question aligns with the 'Remember' level ...,2.444444


In [13]:
# Load the correct dataset with evaluation results
file_path = "Parsed_Evaluated_Questions_fixed.xlsx"  # Update if necessary
data = pd.read_excel(file_path)

# Define weights for each criterion (same as before)
weights = {
    "Relevance Grade": 0.15,
    "Curriculum Fit Grade": 0.12,
    "Accuracy Grade": 0.12,
    "Clarity Grade": 0.10,
    "Single Correct Answer Grade": 0.15,
    "DOK Level Grade": 0.10,
    "Bloom's Taxonomy Grade": 0.08,
    "Bias-Free (Answer Choices) Grade": 0.04,
    "Grammar and Syntax Grade": 0.03,
    "Avoiding Common Errors Grade": 0.02,
    "Readability Grade": 0.03,
    "Plausibility Grade": 0.03,
    "Balance (Answer Choices) Grade": 0.03,
    "Bias-Free Content Grade": 0.03,
    "Formatting Grade": 0.02,
    "Fairness Grade": 0.03,
    "Avoiding Plagiarism Grade": 0.02,
    "Novelty Grade": 0.02
}

# Normalize weights to ensure they sum to 1
total_weight = sum(weights.values())
weights = {k: v / total_weight for k, v in weights.items()}

# Identify critical criteria (weights >= 0.10)
critical_criteria = [k for k, v in weights.items() if v >= 0.10]

# Calculate weighted average
data['Weighted Average'] = sum(data[col] * weights[col] for col in weights.keys())

# Define passing criteria
passing_threshold = 1.5

# Check for failure in critical criteria
data['Fails Critical Criteria'] = data[critical_criteria].apply(lambda row: (row < passing_threshold).any(), axis=1)
data['Passed'] = (~data['Fails Critical Criteria']) & (data['Weighted Average'] >= passing_threshold)

# Extract new high-performing questions (weighted average > 2.5)
high_scoring_questions = data[(data["Passed"] == True) & (data["Weighted Average"] > 2.5)]

# Select the top 5 highest-performing questions based on Weighted Average
top_5_high_performers = high_scoring_questions.nlargest(5, "Weighted Average")

# Generate a new few-shot prompt with the top 5 high-performing questions
few_shot_prompt = """### Few-Shot Learning Prompt for Question Improvement

The goal of this task is to improve low-scoring questions by using high-performing questions as a guide. Given metadata about a low-scoring question, regenerate a new question that aligns with the high-scoring examples.

#### Example High-Performing Questions:
"""

# Include the top 5 high-performing questions as examples with concatenated reasons
for idx, row in top_5_high_performers.iterrows():
    concatenated_reasons = " ".join(str(row[col]) for col in reason_columns if pd.notna(row[col]))

    few_shot_prompt += f"""
**Question:** {row['Question']}
**Choices:** {row['Choices']}
**Answer:** {row['Answer']}
**Explanation:** {concatenated_reasons}
"""

few_shot_prompt += """\n
#### Example Low-Scoring Questions (Avoid these types of questions):
"""

# Select three representative low-scoring questions as examples to avoid
num_low_scoring_examples = min(3, len(data[data["Passed"] == False]))  # Ensure we don't exceed available data
sample_low_scoring_rows = data[data["Passed"] == False].iloc[:num_low_scoring_examples]

# Include three low-scoring questions as examples of what to avoid
for idx, row in sample_low_scoring_rows.iterrows():
    concatenated_reasons_low = " ".join(str(row[col]) for col in reason_columns if pd.notna(row[col]))

    few_shot_prompt += f"""
**Question:** {row['Question']}
**Choices:** {row['Choices']}
**Answer:** {row['Answer']}
**Issues:** {concatenated_reasons_low}
"""

few_shot_prompt += """\n
#### Now, generate a new question based on the metadata and the high-scoring examples above while avoiding the mistakes seen in the low-scoring examples:
"""

# Save the updated prompt to a text file
prompt_file_path = "Few_Shot_Prompt.txt"
with open(prompt_file_path, "w", encoding="utf-8") as f:
    f.write(few_shot_prompt)

# Provide the file for download
prompt_file_path

'Few_Shot_Prompt.txt'