# Salesforce/instructblip-vicuna-7b

In [None]:
import torch
from PIL import Image
from transformers import InstructBlipProcessor, InstructBlipForConditionalGeneration
from collections import defaultdict
import pandas as pd
import re

# Load the InstructBLIP model and processor
processor = InstructBlipProcessor.from_pretrained("Salesforce/instructblip-vicuna-7b")
model = InstructBlipForConditionalGeneration.from_pretrained(
    "Salesforce/instructblip-vicuna-7b",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
).to("cuda:0")

# ----------------------------- COUNT ---------------------------

# Brightness-Count

In [None]:
# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Brightness/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Contrast-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Contrast/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Defocus-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Defocus-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Elastic-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Elastic/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Fog-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Fog/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Frost-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Frost/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Gaussian-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Gaussian-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Impulse-Noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Impulse-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# JPEG-compresion-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/JPEG-compression/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Pixelate-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Pixelate/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Rain-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Rain/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Saturation-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Saturation/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Shot-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Shot-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Snow-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Snow/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Spatter-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Spatter/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Speckle-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Speckle-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Zoom-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Zoom-Blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


## Motion-blur Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] == "count" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Motion-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# ----------------------------- Order ---------------------------

# Brightness-Count

In [None]:
# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Brightness/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Contrast-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Contrast/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Defocus-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Defocus-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Elastic-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Elastic/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Fog-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Fog/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Frost-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Frost/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Gaussian-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Gaussian-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Impulse-Noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Impulse-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# JPEG-compresion-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/JPEG-compression/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Pixelate-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Pixelate/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Rain-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Rain/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Saturation-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Saturation/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Shot-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Shot-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Snow-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Snow/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Spatter-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Spatter/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Speckle-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Speckle-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Zoom-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Zoom-Blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


## Motion-blur Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "order"
for index, row in data.iterrows():
    if row["category"] == "order" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Motion-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# ----------------------------- Trick ---------------------------

# Brightness-Count

In [None]:
# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Brightness/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Contrast-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Contrast/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Defocus-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Defocus-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Elastic-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Elastic/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Fog-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Fog/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Frost-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Frost/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Gaussian-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Gaussian-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Impulse-Noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Impulse-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# JPEG-compresion-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/JPEG-compression/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Pixelate-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Pixelate/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Rain-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Rain/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Saturation-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Saturation/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Shot-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Shot-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Snow-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Snow/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Spatter-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Spatter/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Speckle-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Speckle-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Zoom-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Zoom-Blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


## Motion-blur Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] == "trick" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Motion-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# ----------------------------- VCR ---------------------------

# Brightness-Count

In [None]:
# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Brightness/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Contrast-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Contrast/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Defocus-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Defocus-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Elastic-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Elastic/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Fog-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Fog/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Frost-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Frost/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Gaussian-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Gaussian-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Impulse-Noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Impulse-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# JPEG-compresion-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/JPEG-compression/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Pixelate-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
    
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Pixelate/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Rain-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Rain/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Saturation-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Saturation/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Shot-noise-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Shot-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Snow-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Snow/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Spatter-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Spatter/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Speckle-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Speckle-noise/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Zoom-blur-Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Zoom-Blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


## Motion-blur Count

In [None]:
# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "vcr"
for index, row in data.iterrows():
    if row["category"] == "vcr" and row['modified_question_function_name'] == 'misspell_word':
        
        # Load the corresponding image
        image_path = f"Weighted AVG Denoised [TopK]/Motion-blur/{row['path']}"
        image = Image.open(image_path)

        # Prepare the prompt template
        question_type = row["category"]
        question = row["question"]
        options = {
            "A": row["A"],
            "B": row["B"],
            "C": row["C"],
            "D": row["D"]
        }

        prompt_text = (
            f"The following are multiple choice questions about {question_type}. "
            "You should directly answer the question by choosing the correct option given the image and the question. "
            "Give only the letter indicating the correct answer e.g. 'A'\n"
            f"Question: {question}\n"
            "Options:\n"
            f"A. {options['A']}\n"
            f"B. {options['B']}\n"
            f"C. {options['C']}\n"
            f"D. {options['D']}\n"
            "Answer:"
        )

        # Prepare inputs
        inputs = processor(images=image, text=prompt_text, return_tensors="pt").to("cuda:0")

        # Generate model output
        output = model.generate(**inputs, max_new_tokens=1000)
        full_answer = processor.batch_decode(output, skip_special_tokens=True)[0]
        
        print(f"Full model output for Question ID {row['id']}: {full_answer}")
        # print(f"Full model output for Question ID: {full_answer}")
        
        # Extract the predicted answer (option letter)
        if match := re.search(r"Answer:\s*([A-D])", full_answer):
            predicted_answer = match.group(1)  # Extract the letter (A, B, C, or D)
        else:
            print(f"Could not extract answer for Question ID {row['id']}: {full_answer}")
            predicted_answer = "Unknown"
        
        predicted_answer = [predicted_answer]

        # Get the noise type
        noise_type = row["modified_question_function_name"]
        actual_answer = [row["answer"][2]]

        # Compare with the actual answer and update counters
        if predicted_answer == actual_answer:
            noise_type_accuracy[noise_type]["correct"] += 1
        noise_type_accuracy[noise_type]["total"] += 1

        # Print the prediction
        print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

    # Calculate and print accuracy per noise type
    for noise_type, counts in noise_type_accuracy.items():
        accuracy = counts["correct"] / counts["total"] if counts["total"] > 0 else 0
        print(f"Noise Type: {noise_type} - Accuracy: {accuracy:.2%}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")