# Speckle-noise

In [None]:
import pandas as pd
import os
import json
from PIL import Image
from collections import defaultdict
import google.generativeai as genai

# Configure Gemini API
API_KEY = "<INSERT YOUR GEMINI API KEY HERE>"
genai.configure(api_key=API_KEY)
model = genai.GenerativeModel("gemini-2.0-flash")

# Paths
DATA_PATH = "Noisy-Denoised_QuestionPairs[new].csv"
CHECKPOINT_PATH = "checkpoint_Speckle-noise_NoisyNoisy.json"

# Load dataset
data = pd.read_csv(DATA_PATH)

# Load previous progress if exists
if os.path.exists(CHECKPOINT_PATH):
    with open(CHECKPOINT_PATH, "r") as f:
        checkpoint_data = json.load(f)
    
    # Use .get() to prevent KeyError if the key does not exist
    processed_questions = defaultdict(list, checkpoint_data.get("processed_questions", {}))
    noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0}, checkpoint_data.get("accuracy", {}))
    total_correct = checkpoint_data.get("total_correct", 0)
    total_predictions = checkpoint_data.get("total_predictions", 0)
else:
    processed_questions = defaultdict(list)
    noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})
    total_correct = 0
    total_predictions = 0


# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    question_id = row["id"]
    noise_type = row["modified_question_function_name"]

    # Skip if this specific question with this noise type was already processed
    if noise_type in processed_questions[question_id]:
        continue

    image_path = f"Noisy DARE TEST/Speckle-noise/{row['path']}"
    
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare prompt
    question_type = row["category"]
    question = row["modified_question"]
    options = {key: row[key] for key in ["A", "B", "C", "D"]}
    
    prompt_text = (
        f"The following are multiple-choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g., 'A'.\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )
    
    try:
        # Send request to Gemini API
        response = model.generate_content([prompt_text, image])
        full_answer = response.text.strip()
        predicted_answer = full_answer.split()[-1]  # Extract last word

    except Exception as e:
        print(f"API error: {e}. Saving checkpoint and exiting...")
        break  # Stop execution to prevent further API exhaustion

    # Evaluate accuracy
    if predicted_answer == row["answer"][2]:  # Compare with actual answer
        noise_type_accuracy[noise_type]["correct"] += 1
        total_correct += 1
    noise_type_accuracy[noise_type]["total"] += 1
    total_predictions += 1

    # Mark this (question_id, noise_type) pair as processed
    processed_questions[question_id].append(noise_type)

    # Save checkpoint every 50 iterations
    if total_predictions % 50 == 0:
        checkpoint_data = {
            "processed_questions": dict(processed_questions),
            "accuracy": dict(noise_type_accuracy),
            "total_correct": total_correct,
            "total_predictions": total_predictions,
        }
        with open(CHECKPOINT_PATH, "w") as f:
            json.dump(checkpoint_data, f)
        print(f"Checkpoint saved at {CHECKPOINT_PATH}")

    # Print progress
    print(f"Question ID {row['id']} - Predicted: {predicted_answer} - Actual: {row['answer']} - Noise: {noise_type}")
    print(f"Progress: {total_correct}/{total_predictions} and OVERALL accuracy percentage: {(total_correct / total_predictions) * 100}%")
    print("Noise Type Accuracy so far:")
    for n_type, counts in noise_type_accuracy.items():
        print(f"{n_type}: {counts['correct']}/{counts['total']} and {n_type} accuracy percentage: {(counts['correct'] / counts['total']) * 100}%")
    print("----------------------")

# Final checkpoint save
checkpoint_data = {
    "processed_questions": dict(processed_questions),
    "accuracy": dict(noise_type_accuracy),
    "total_correct": total_correct,
    "total_predictions": total_predictions,
}
with open(CHECKPOINT_PATH, "w") as f:
    json.dump(checkpoint_data, f)

print(f"Final checkpoint saved at {CHECKPOINT_PATH}.")


Question ID count_1189 - Predicted: C - Actual: ['C'] - Noise: abbreviation_replacement
Progress: 1/1 and OVERALL accuracy percentage: 100.0%
Noise Type Accuracy so far:
abbreviation_replacement: 1/1 and abbreviation_replacement accuracy percentage: 100.0%
----------------------
Question ID count_2275 - Predicted: C - Actual: ['C'] - Noise: abbreviation_replacement
Progress: 2/2 and OVERALL accuracy percentage: 100.0%
Noise Type Accuracy so far:
abbreviation_replacement: 2/2 and abbreviation_replacement accuracy percentage: 100.0%
----------------------
Question ID count_283 - Predicted: B - Actual: ['B'] - Noise: abbreviation_replacement
Progress: 3/3 and OVERALL accuracy percentage: 100.0%
Noise Type Accuracy so far:
abbreviation_replacement: 3/3 and abbreviation_replacement accuracy percentage: 100.0%
----------------------
Question ID count_2217 - Predicted: B - Actual: ['B'] - Noise: abbreviation_replacement
Progress: 4/4 and OVERALL accuracy percentage: 100.0%
Noise Type Accuracy