# llava-v1.6-vicuna-7b-hf  

## General Inferencing

In [None]:
import torch

# Check if CUDA is available
if torch.cuda.is_available():
    print(f"CUDA is available. Device count: {torch.cuda.device_count()}")
    print(f"Using device: {torch.cuda.get_device_name(0)}")
    print(f"Current device index: {torch.cuda.current_device()}")
else:
    print("CUDA is not available.")

# Brightness-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Brightness/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Contrast-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Contrast/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")

In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Defocus-blur-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Defocus-blur/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Elastic-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Elastic/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Fog-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Fog/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Frost-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Frost/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Gaussian-noise-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Gaussian-noise/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Impulse-Noise-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Impulse-noise/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# JPEG-compresion-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/JPEG-compression/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Pixelate-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Pixelate/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")


# Rain-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Rain/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Saturation-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Saturation/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Shot-noise-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Shot-noise/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Snow-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Snow/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Spatter-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Spatter/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Speckle-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Speckle-noise/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Zoom-blur-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Zoom-Blur/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

## Uncorrupted - Count


In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "count"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Uncorrupted/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Motion-blur-Count

In [None]:
import pandas as pd
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration
import torch
from PIL import Image
from collections import defaultdict

# Load the model and processor
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("llava-hf/llava-v1.6-vicuna-7b-hf", torch_dtype=torch.float16, low_cpu_mem_usage=True)
model.to("cuda:0")

# Load the dataset
data = pd.read_csv("Noisy-Denoised_QuestionPairs[new].csv")

# Initialize counters for accuracy per noise type
noise_type_accuracy = defaultdict(lambda: {"correct": 0, "total": 0})

# Iterate over rows where category is "trick"
for index, row in data.iterrows():
    if row["category"] != "count":
        continue  # Skip non-count categories

    # Load the corresponding image
    image_path = f"DENOISED DARE DRUNET (MAIN)/Motion-blur/{row['path']}"
    try:
        image = Image.open(image_path)
    except FileNotFoundError:
        print(f"Image not found: {image_path}. Skipping...")
        continue

    # Prepare the prompt template
    question_type = row["category"]
    question = row["modified_question"]
    options = {
        "A": row["A"],
        "B": row["B"],
        "C": row["C"],
        "D": row["D"]
    }

    prompt_text = (
        f"The following are multiple choice questions about {question_type}. "
        "You should directly answer the question by choosing the correct option given the image and the question. "
        "Give only the letter indicating the correct answer e.g. 'A'\n"
        f"Question: {question}\n"
        "Options:\n"
        f"A. {options['A']}\n"
        f"B. {options['B']}\n"
        f"C. {options['C']}\n"
        f"D. {options['D']}\n"
        "Answer:"
    )

    # Define conversation for the processor
    conversation = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt_text},
                {"type": "image"},
            ],
        },
    ]
    prompt = processor.apply_chat_template(conversation, add_generation_prompt=True)

    # Prepare inputs
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda:0")

    # Generate model output
    output = model.generate(**inputs, max_new_tokens=1000)
    full_answer = processor.decode(output[0], skip_special_tokens=True)

    # Extract only the final answer (the option letter)
    predicted_answer = full_answer.split()[-1]  # Get the last word (e.g., "A", "B", "C", or "D")

    # Get the noise type
    noise_type = row["modified_question_function_name"]

    # Compare with the actual answer and update counters
    if predicted_answer == row["answer"][2]:
        noise_type_accuracy[noise_type]["correct"] += 1
    noise_type_accuracy[noise_type]["total"] += 1

    # Print the prediction
    print(f"Question ID {row['id']} - Predicted Answer: {predicted_answer} - Actual Answer: {row['answer']} - Noise Type: {noise_type}")


In [None]:
# Initialize counters for overall accuracy
total_correct = 0
total_predictions = 0

# Calculate and print accuracy per noise type
for noise_type, counts in noise_type_accuracy.items():
    correct = counts["correct"]
    total = counts["total"]
    print(f"Noise Type : {noise_type}, Correct Predictions: {correct}, Total: {total}")
    accuracy = (correct / total) * 100 if total > 0 else 0
    print(f"Accuracy for noise type '{noise_type}': {accuracy:.2f}%")
    
    # Update overall counters
    total_correct += correct
    total_predictions += total

# Calculate overall accuracy
print(f"Total Correct: {total_correct}, Total Predictions: {total_predictions}")
overall_accuracy = (total_correct / total_predictions) * 100 if total_predictions > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")