In [None]:
import json 

# File path to your JSON file
file_path = 'output/84_task1.json'

# Load JSON data from the file
with open(file_path, 'r') as file:
    parsed_data = json.load(file)

# Find indices where prediction is "fake"
fake_indices = [item['index'] for item in parsed_data if item['prediction'] == "fake"]


In [None]:
!pip install --upgrade transformers
!pip install torch
!pip install 'accelerate>=0.26.0'

In [None]:
import requests
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor

model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
token = ""  # replace with your actual Hugging Face API token

# Load the model with the token for authentication
model = MllamaForConditionalGeneration.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",  # This automatically uses multiple GPUs if available, can change to None if you want to specify a single GPU.
    token=token  # API token for Hugging Face authentication
)

# Load the processor with the token for authentication
processor = AutoProcessor.from_pretrained(
    model_id,
    token=token  # API token for Hugging Face authentication
)



In [None]:
import torch
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# import torch
# import torch.nn as nn
# import torch.optim as optim
# from torchvision import models, transforms
# from torch.utils.data import DataLoader, Dataset
# from sklearn.model_selection import train_test_split
# from tqdm import tqdm

# # Step 1: Load the Pre-trained ResNet-18 Model
# classifier = models.densenet121(pretrained=True)

# # Step 2: Modify the final fully connected layer for binary classification (10 output)
# classifier.classifier = nn.Sequential(
#     nn.Linear(classifier.classifier.in_features, 10),   # Change the output to 10 unit
# )

# classifier.load_state_dict(torch.load('../classify-10/densenet121_classify10_weights.pth', map_location = device))

In [None]:
import json

# Initialize the data structure
json_data = []

# Function to add an entry
def add_to_json(json_data, index, artifact_name, explanation):
    # Check if the index already exists
    for entry in json_data:
        if entry['index'] == index:
            # Add the artifact and explanation
            entry['explanation'][artifact_name] = explanation
            return json_data
    
    # If index doesn't exist, create a new entry
    json_data.append({
        "index": index,
        "explanation": {artifact_name: explanation}
    })
    return json_data


In [None]:
import random

unnatural_colors = ["blue", "magenta", "pink", "purple", "cyan"]

def extract_ans(prompt, image, obj, index):
    global json_data
    
    is_child_prompt = False
    answers = []
    
    for sub_prompt in prompt:
        # Set the seed
        seed = 10
        
        # Ensure reproducibility
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        np.random.seed(seed)
        random.seed(seed)
        print("Prompt: ", sub_prompt)
        messages = [
            {"role": "user", "content": [
                {"type": "image"},
              #  {"type": "text", "text": "Is the heatmap red part concentrated on the 'face','body' or 'background'? Answer in one word."}
                {"type": "text", "text": sub_prompt}
            ]}
        ]
        input_text = processor.apply_chat_template(messages, add_generation_prompt=True)

        # Process the input, ensure the inputs are on the same device as the model
        inputs = processor(
            image,
            input_text,
            add_special_tokens=False,
            return_tensors="pt"
        ).to(model.device)
        
        output = model.generate(**inputs, max_new_tokens=4, temperature=0.7)

        text = processor.decode(output[0])
            
        lines = text.strip().splitlines()
        # Access the last line
        last_line = lines[-1]
        end = min(last_line.find("."), last_line.find("<"))
        line = last_line[0:end].lower()
        if "yes" in line:
            ans = "yes"
        elif "no" in line:
            ans = "no"
        else:
            ans = ''.join([char for char in line if char.isalpha() or char.isspace()])

        print("Answer: ", ans)

        if index != -1:
            answers.append(ans)
            
        if ans != "yes":
            break
    # for answer in answers:
    #     if answer != "yes":
    #         if answer in unnatural_colors:
    #             if obj == "bird":
    #                 explanation = get_explanations(artifact, obj, "beak", ans)
    #             else:
    #                 explanation = get_explanations(artifact, obj, "body", ans)
    #         else:
    #             explanation = get_explanations(artifact, obj, ans, "NULL")
    
    #         json_data = add_to_json(json_data, index, artifact, explanation)
    final_ans = ans
    if index == -1 or final_ans == "no" or final_ans == "none":
        return final_ans


    artifact = prompt_to_artifact(obj, prompt[0])
    if artifact == "Incorrect skin tones":
        if final_ans in unnatural_colors:
            if obj == "bird":
                explanation = get_explanations(artifact, obj, "beak", final_ans)
            else:
                explanation = get_explanations(artifact, obj, "body", final_ans)
        else:
            return final_ans
    elif final_ans != "yes":
        explanation = get_explanations(artifact, obj, final_ans, "NULL")
    else:
        explanation = get_explanations(artifact, obj, "NULL", "NULL")

    json_data = add_to_json(json_data, index, artifact, explanation)        
    
    return final_ans

In [None]:
def prompt_to_artifact(obj, prompt):
    mapping = {
        common_prompts[0][0] : "Unnatural Lighting Gradients",
        common_prompts[1][0] : "Glow or light bleed around object boundaries",
        common_prompts[2][0] : "Texture repetition patterns",
        common_prompts[3][0] : "Unrealistic specular highlights",

        animal_prompts[1][0] : "Incorrect skin tones",
        
        prompts['airplane'][0][0]: "Implausible aerodynamic structures",
        prompts['airplane'][1][0]: "Misaligned body panels",
        prompts['airplane'][2][0]: "Inconsistent scale of mechanical parts",
        prompts['airplane'][3][0]: "Abruptly cut off objects",
        
        prompts['car'][0][0]:"Incorrect wheel geometry",
        prompts['car'][1][0]: "Misaligned body panels",
    
        prompts['truck'][0][0]: "Incorrect wheel geometry",
        prompts['truck'][1][0]: "Misaligned body panels",
    
        prompts['ship'][0][0]: "Misaligned body panels",
        prompts['ship'][1][0]: "Inconsistent scale of mechanical parts",
        prompts['ship'][2][0]: "Incorrect reflection mapping",
        
        prompts['cat-face'][0][0]: "Misaligned bilateral elements in animal faces",
        prompts['cat-face'][1][0]: "Misshapen ears or appendages",
        prompts['cat-face'][2][0]: "Blurred boundaries in fine details",
        prompts['cat-body'][0][0]: "Abruptly cut off objects",
        prompts['cat-body'][1][0]: "Floating or disconnected components",
        prompts['cat-body'][2][0]: "Anatomically incorrect paw structures",
        prompts['cat-body'][3][0]: "Scale inconsistencies within same objects",
        
        prompts['dog-face'][0][0]:"Misaligned bilateral elements in animal faces",
        prompts['dog-face'][1][0]: "Misshapen ears or appendages",
        prompts['dog-face'][2][0]: "Blurred boundaries in fine details",
        prompts['dog-body'][0][0]: "Abruptly cut off objects",
        prompts['dog-body'][1][0]: "Floating or disconnected components",
        prompts['dog-body'][2][0]: "Anatomically incorrect paw structures",
        prompts['dog-body'][3][0]: "Scale inconsistencies within single objects",
        
        prompts['horse-face'][0][0]: "Misaligned bilateral elements in animal faces",
        prompts['horse-face'][1][0]: "Misshapen ears or appendages",
        prompts['horse-face'][2][0]: "Blurred boundaries in fine details",
        
        prompts['horse-body'][0][0]: "Anatomically impossible joint configurations",
        prompts['horse-body'][1][0]: "Abruptly cut off objects",
        prompts['horse-body'][2][0]: "Floating or disconnected components",
        prompts['horse-body'][3][0]: "Scale inconsistencies within single objects",
        
        prompts['deer-face'][0][0]: "Misaligned bilateral elements in animal faces",
        prompts['deer-face'][1][0]: "Misshapen ears or appendages",
        prompts['deer-face'][2][0]: "Blurred boundaries in fine details",
        prompts['deer-body'][0][0]: "Anatomically impossible joint configurations",
        prompts['deer-body'][1][0]: "Abruptly cut off objects",
        prompts['deer-body'][2][0]: "Scale inconsistencies within single objects",
        
        prompts['bird'][0][0]: "Misaligned bilateral elements in animal faces",
        prompts['bird'][1][0]: "Incorrect skin tones",
        prompts['frog'][0][0]: "Anatomically impossible joint configurations",
        prompts['frog'][1][0]: "Unnaturally glossy surfaces",
    
    }
    return mapping[prompt]


In [None]:
def get_explanations(artifact, obj, feature, color):
    artifact_explanations = {
        "Blurred boundaries in fine details": f"The eyes, nose and mouth of the {obj} are not easily seperatable, caused due to blurriness in the fine details of the face",
        "Floating or disconnected components": f"Parts of the {obj} appear to float unnaturally.",
        "Asymmetric features in naturally symmetric objects": f"The {obj}'s features are uneven or misaligned, making it look unnatural.",
        "Misaligned bilateral elements in animal faces": f"{feature} of the {obj}, are misaligned or asymmetrical.",
        "Texture bleeding between adjacent regions": f"Textures of different regions of the {obj} are not well seperated, and seem to bleed into one another.",
        "Texture repetition patterns": "Identical patterns appear unnaturally repeated on surfaces",
        "Unrealistic specular highlights": f"The {obj} shines on one side of its body, despite no direct light being incident on that side, which shows artificial nature of the image.",
        "Anatomically incorrect paw structures": f"The {obj}'s paws have incorrect shapes or structures.",
        "Misshapen ears or appendages": f"The {obj}'s ears have distorted or unusual shapes.",
        "Impossible mechanical connections": f"Connections between parts of the {obj} defy physics or engineering principles.",
        "Inconsistent scale of mechanical parts": f"{feature} of the {obj} appears at incorrect scales relative to the rest of the {obj}.",
        "Physically impossible structural elements": f"The {obj} has structures that would collapse in reality.",
        "Incorrect reflection mapping": f"The {obj}'s reflection doesn't match its own figure unlike in real world reflections.",
        "Scale inconsistencies within single objects": f"The {feature} of the {obj} is unnaturally sized as compared to the rest of the body.",
        "Incorrect wheel geometry": f"The wheels appear misshapen or distorted.",
        "Implausible aerodynamic structures": "The airplane's wings and other components are not aerodynamically viable, unlike those of a real aircraft.",
        "Misaligned body panels": f"the {obj}'s {feature} appear offset from their usual positions as in corresponding real {obj}",
        "Anatomically impossible joint configurations": f"The {obj} joints bend in directions that are physically impossible.",
        "Repeated element patterns": "The same feature, like a tree, appears unnaturally duplicated in the scene.",
    
        "Incorrect skin tones": f"The {obj}'s {feature} is {color} in color, which is very uncommon in actual real life {obj}s",
    
        "Unnatural color transitions": "Color changes appear harsh and unrealistic.",
        "Unnatural Lighting Gradients": f"The {obj} looks artificial amidst the unnatural lighting gradient visible in the background.",
        "Abruptly cut off objects": f"Parts of the {obj} are visibly truncated, as if cropped.",
        "Glow or light bleed around object boundaries": f"Bright halos appear around the boundaries of the {obj}, disrupting natural lighting.",
        "Cinematization Effects": f"The {obj} has high contrast, which resembles beautification of the character as in movies.",
        "Movie-poster-like composition of ordinary scenes": "The background appears unnaturally bright, with a striking contrast reminiscent of cinematic lighting.",
        "Artificial smoothness": "The background of the images appear overly smoothened, losing natural texture.",
        "Unnaturally glossy surfaces": f"The {obj}'s body seems to shine like a metallic surface, which should not be the case owing to its skin's natural nature.",
        # "Scale inconsistencies within the same object class": f"{obj}'s have mismatched sizes in the same image."
        "Scale inconsistencies within single objects": f"Certain parts of the {obj} exhibit unnatural size proportions, which disrupt the overall balance and make the design appear less realistic."
    }
    return artifact_explanations[artifact]

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm


class_list = [ 'airplane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
transform = transforms.Compose([
  transforms.Resize((224, 224)),
  transforms.ToTensor(),
  transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for ImageNet
])

common_prompts = []
animal_prompts = []

def classify_obj(image):
    global common_prompts, animal_prompts
    # image = torch.unsqueeze(transform(image), dim = 0)

    # outputs = classifier(image)  # Forward pass
    # # print(outputs.shape)
    # _, pred_label = torch.max(outputs, 1)

    obj = extract_ans(["Classify the image into one of 'airplane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'. If neither, say 'others'. Answer in one word."], image, "", -1).lower()
    
    # obj = class_list[pred_label]
    
    common_prompts = [
        [f"You are an artificial image artifact detector. This is a deepfake image of a {obj}. Does the 'background' show clearly a very smooth color gradient? Say 'yes' or 'no'. Answer in one word."],
        [f"You are an artificial image artifact detector. This is a deepfake image of a {obj}. Does the image show Light Glowing around the boundary of the {obj}? Simply say 'yes' or 'no'. Answer in one word."],
        [f"You are an artificial image artifact detector. This is a deepfake image of a {obj}. Do you see identical patterns appear unnaturally repeated on surfaces. Simply say 'yes' or 'no'. Answer in one word."],
        [f"You are an artificial image artifact detector. This is a deepfake image of a {obj}. Is the {obj} shining on the side light is not incident on it? Say 'yes' or 'no'. Answer in one word."]
    ]
    
    animal = "NONE"
    if obj in ['cat', 'deer', 'dog', 'horse']:
        animal = obj
        animal_prompts = [
            [f"You are an artificial image artifact detector. This is a deepfake image of a {animal}. Is the annotated part of the image centered on the 'body' or 'face' of the {animal}? Answer in one word."],
            [f"You are an artificial image artifact detector. This is a deepfake image of a {animal}. Specify the {animal}'s color. Answer in one word."]
        ]


    return obj

prompts = {
    'airplane': [
        ["You are an artificial image artifact detector. This is a deepfake image of an airplane. Can you tell me if this image contains Implausible Aerodynamic Structures? Simply say 'yes' or 'no'. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of an airplane. Can you tell me if this image contains Misaligned Body Panels? Simply say 'yes' or 'no'. Answer in one word.", "Image shows deepfake of an airplane. Which airplane mechanical part is positioned 'wrongly' in the image? Say 'none' is everything is ok. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of an airplane. Are different airplane mechanical parts out of proportion in size? Simply say 'yes' or 'no'. Answer in one word.", "Image shows deepfake of an airplane. Check all four options before answering. Out of 'wings', 'tail', 'wheels', 'engines', which one is not of correct size? Answer in one word by choosing one."],
        ["You are an artificial image artifact detector. This is a deepfake image of an airplane. Does any part of the airplane abruptly end within the image? Simply say 'yes' or 'no'. Answer in one word.", "Image shows deepfake of an airplane. Which airplane mechanical part is abruptly cut off in the middle of the image? Say 'none' if all are ok. Answer in one word."] #Check all threr options before answering. Out of 'wings', 'tail', 'fuselage', which one is not complete? Answer in one word."],
    ],

    'car': [
        ["You are an artificial image artifact detector. This is a deepfake image of a car. Are there issues with the wheel structure? Simply say 'yes' or 'no'. Say 'no' if not visible. If wheel is not visible completely, say 'no'. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of a car. Can you tell me if this image contains Misaligned Body Panels? Simply say 'yes' or 'no'.", "Image shows deepfake of a car. Check all four options before answering. Out of 'headlights', 'tail lights', 'rear view mirror', 'door', which one has some error in it? Answer in one word."],
    ],

    'truck': [
        ["You are an artificial image artifact detector. This is a deepfake image of a truck. Are there issues with the wheel structure? Simply say 'yes' or 'no'. If wheel is not visible completely, say 'no'. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of a truck. Can you tell me if this image contains Misaligned Body Panels? Simply say 'yes' or 'no'. Answer in one word.", "Image shows deepfake of a truck. Check all four options before answering. Out of 'headlights', 'tail lights', 'rear view mirror', 'door', which one has some error in it? Answer in one word."],
    ],

    'ship': [
        ["You are an artificial image artifact detector. This is a deepfake image of a ship. Does the image contain Misaligned Body Panels? Simply say 'yes' or 'no'. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of a ship. Does the image contain Inconsistent Scale of Mechanical Parts? Simply say 'yes' or 'no'. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of a ship. If the image shows the ship's reflection, is it symmetric with respect to the ship? Simply say 'yes' or 'no'. Say 'no' if no reflection. Answer in one word."],
    ],

    'cat-face': [
        ["You are an artificial image artifact detector. This is a deepfake image of a cat. Does the image contain Asymmetry in the cat's facial features? Simply say 'yes' or 'no'.  If only one side of face is visible, say 'no'. Answer in one word.", "Image shows deepfake of a cat. Check all four options before answering. Out of 'eyes', 'ears', 'nose', 'mouth', which one is not symmetrical? Answer in one word by choosing one."],
        ["You are an artificial image artifact detector. This is a deepfake image of a cat. Are the ears like a normal cat? Simply say 'yes' or 'no'. Answer in one word."],
        ["You are an artificial image artifact detector. This is a deepfake image of a cat. Are the eyes, nose, and mouth clearly distinguishable from each other? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'cat-body': [
        ["Image shows deepfake of a cat. Check all three before choosing one. Is the cat missing any 'limb' or 'tail', or is it 'ok'? Simply say 'yes' or 'no'. Answer in one word by choosing one."],
        ["Image shows deepfake of a cat. Check all three before choosing one. Is any 'limb' or 'tail' disconnected from the main body, or is it 'ok'? Answer in one word by choosing one."],
        ["This is a deepfake image of a cat. Can you tell me if this image contains Anatomically incorrect paw structures? Say 'yes' or 'no'. If paw shape is not visible properly, say 'no'. Answer in one word."],
        ["This is a deepfake image of a cat. Is the head size abnormal compared to the body size? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'dog-face': [
        ["This is a deepfake image of a dog. Does the image contain Asymmetry in the dog's facial features? Simply say 'yes' or 'no'.  If only one side of face is visible, say 'no'. Answer in one word.", "Image shows deepfake of a dog. Check all four options before answering. Out of 'eyes', 'ears', 'nose', 'mouth', which one is not symmetrical? Answer in one word by choosing one."],
        ["This is a deepfake image of a dog. Are the ears like a normal dog? Simply say 'yes' or 'no'. Answer in one word."],
        ["This is a deepfake image of a dog. Are the eyes, nose, and mouth distinguishable from each other? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'dog-body': [
        ["Image shows deepfake of a dog. Check all three before answering. Is the dog missing any 'limb' or 'tail', or is it 'ok'? Simply say 'yes' or 'no'. Answer in one word by choosing one."],
        ["Image shows deepfake of a dog. Check all three before answering. Is any 'limb' or 'tail' disconnected from the main body, or is it 'ok'? Answer in one word by choosing one."],
        ["This is a deepfake image of a dog. Can you tell me if this image contains Anatomically incorrect paw structures? Say 'yes' or 'no'. If paw shape is not visible properly, say 'no'. Answer in one word."],
        ["This is a deepfake image of a dog. Is the head size abnormal compared to the body size? Simply say 'yes' or 'no'."],
    ],

    'horse-face': [
        ["This is a deepfake image of a horse. Does the image contain Asymmetry in the horse's facial features? Simply say 'yes' or 'no'.  If only one side of face is visible, say 'no'. Answer in one word.", "Image shows deepfake of a horse. Check all four options before answering. Out of 'eyes', 'ears', 'nose', 'mouth', which one is not symmetrical? Answer in one word by choosing one."],
        ["This is a deepfake image of a horse. Are the ears like a normal horse? Simply say 'yes' or 'no'. Answer in one word."],
        ["This is a deepfake image of a horse. Are the eyes, nose, and mouth distinguishable from each other? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'horse-body': [
        ["This is a deepfake image of a horse. Are the leg joints bent in an odd manner? Simply say 'yes' or 'no'. Answer in one word."],
        ["Image shows deepfake of a horse. Check all three before choosing one. Is the dog missing any 'limb' or 'tail', or is it 'ok'? Simply say 'yes' or 'no'. Answer in one word by choosing one. Answer in one word."],
        ["Image shows deepfake of a horse. Check all three before choosing one. Is any 'limb' or 'tail' disconnected from the main body, or is it 'ok'? Simply say 'yes' or 'no'. Answer in one word."],
        ["You are a helpful assistant. This is a deepfake image of a horse. Is the head size abnormal compared to the body size? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'deer-face': [
        ["This is a deepfake image of a deer. Does the image contain Asymmetry in the deer's facial features? Simply say 'yes' or 'no'. If only one side of face is visible, say 'no'. Answer in one word.", "Image shows deepfake of a deer. Check all four options before answering. Out of 'eyes', 'ears', 'antlers', 'nose', 'mouth', which one is not symmetrical? Answer in one word by choosing one."],
        ["This is a deepfake image of a deer. Are the 'ears' and 'antlers' both like a normal deer? Simply say 'yes' or 'no'. Answer in one word."],
        ["This is a deepfake image of a deer. Are the eyes, nose, and mouth distinguishable from each other? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'deer-body': [
        ["You are a helpful assistant. This is a deepfake image of a deer. Does the image contain anatomically incorrect joint configurations? Simply say 'yes' or 'no'. Answer in one word."],
        ["Is the deer missing any 'limb' or its 'tail', or is it 'ok'? Simply say 'yes' or 'no'."],
        ["This is a deepfake image of a deer. Is the head size abnormal compared to the body size? Simply say 'yes' or 'no'. Answer in one word."],
    ],

    'bird': [
        ["This is a deepfake image of a bird. If the bird's head is small, say 'no'. Otherwise, are the eyes and beak not prominent? Simply say 'yes' or 'no'. Answer in one word.", "You are a helpful assistant. This is a deepfake image of a bird. Can you tell me if this image contains Asymmetry in the bird's facial features? Simply say 'yes' or 'no'. If face is not visible completely, say 'no'.", "Image shows deepfake of a bird. Check both options before answering. Out of 'eyes' and 'beak', which one is not symmetrical? Answer in one word by choosing one."],
        ["This is a deepfake image of a bird. Does it have a blue beak? Say 'yes' or 'no'. If not visible, say 'no'. Answer in one word."],
    ],

    'frog': [
        ["This is a deepfake image of a frog. Look at the frog's hindlimbs. Is the joint configuration broken or discontinous? Simply say 'yes' or 'no'. Answer in one word."],
        ["This is a deepfake image of a frog. Is the body glossy like a metal shiny surface? Say 'yes' or 'no'. Answer in one word."],
    ],
}


### FROG BIG FEET
                # "You are a helpful assistant. This is a deepfake image of an airplane. Can you tell me if this image contains Oversized Wheels? Simply say 'yes' or 'no'.",


# "You are a helpful assistant. This is a deepfake image of a cat. Cam you tell me if this image contains Abruptly cut off objects? Simply say 'yes' or 'no'.",
                # "You are a helpful assistant. This is a deepfake image of an airplane. Cam you tell me if this image contains Non Manifold Geometries ? Simply say 'yes' or 'no'.",
                # "You are a helpful assistant. This is a deepfake image of an airplane. Cam you tell me if this image contains Impossible Mechanical Connections? Simply say 'yes' or 'no'."
 

# "Describe the oddest color in the {animal}"
# "Are the surroundings dark but the {object} still shining? Say 'yes' or 'no'.", --> Unrealistic Specular Highlights

#"You are a helpful assistant. This is a deepfake image of a cat. Cam you tell me if this image contains Anatomically incorrect paw structures? Simply say 'yes' or 'no'.",\

#    "You are a helpful assistant. This is a deepfake image of a truck. Can you tell me if this image contains Inconsistent Scale of Mechanical Parts? Simply say 'yes' or 'no'."],

In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt 


def evaluate_img(image_path, annotated_image_path, index):
    image = Image.open(image_path)
    annotated_image = Image.open(annotated_image_path)
    fig, ax = plt.subplots(1, 2, figsize = (6, 3))
    ax[0].imshow(image)
    ax[1].imshow(annotated_image)
    plt.show()

    obj = classify_obj(image)
    print(obj)
    
    artifacts = []
    for prompt in common_prompts:
         ans = extract_ans(prompt, image, obj, index)
        # st.append(extracted_sentence)
    if obj != 'others':
        if obj in ['cat', 'deer', 'dog', 'horse']:
            ans = extract_ans(animal_prompts[0], annotated_image, obj, -1).lower()
    
            if ans == "face":
                is_face = True
            else:
                is_face = False
                
            for i in range(1, len(animal_prompts)):
                ans = extract_ans(animal_prompts[i], image, obj, index)
            
            if is_face == True:
                class_prompts = prompts[f'{obj}-face']
            else:
                class_prompts = prompts[f'{obj}-body']
        else:
            class_prompts = prompts[f'{obj}']
            
        for prompt in class_prompts:
            ans = extract_ans(prompt, image, obj, index)    
    
                

In [None]:
for i in fake_indices:
    image_path = f"output/temp/super_res/{i}.png"
    annotated_image_path = f"output/temp/grad_cam/{i}.png"
    evaluate_img(image_path, annotated_image_path, i)
    # print(json_data)

# Save to a JSON file
output_json_file = 'output/84_task2.json'
with open(output_json_file, 'w') as file:
    json.dump(json_data, file, indent=4)

In [None]:
# from PIL import Image
# import numpy as np
# # for i in range(1, 10): 
# image_path = f"sr_4 (2).jpg"
# image = Image.open(image_path)
# # annotated_image_path = f"grad/{i}_grad.jpg" 
# # annotated_image = Image.open(annotated_image_path)
# # class = classifier(image_path)
# # prompt = prompts[class]
# # print(class)
# # print(prompt)

# messages = [
#     {"role": "user", "content": [
#         {"type": "image"},
#       #  {"type": "text", "text": "Is the heatmap red part concentrated on the 'face','body' or 'background'? Answer in one word."}
#         {"type": "text", "text": "You are a helpful assistant. This is a deepfake image of a car. Can you tell me if this image contains Misaligned Body Panels? Simply say 'yes' or 'no'. "}
#     ]}
# ]
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True)

# # Process the input, ensure the inputs are on the same device as the model
# inputs = processor(
#     image,
#     input_text,
#     add_special_tokens=False,
#     return_tensors="pt"
# ).to(model.device)

# # Generate output from the model
# output = model.generate(**inputs, max_new_tokens=10)
# text = processor.decode(output[0])
# # start = text.find('<|end_header_id|>') + len('<|end_header_id|>')
# # end = text.find('<|eot_id|>')

# # extracted_sentence = text[start:end].strip()

# print(text)#extracted_sentence)

In [None]:
import cv2
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
import json

def image_black(image):
    """
    Detect if the background of an image transitions smoothly from one color to another (gradient).

    Parameters:
        image_path (str): Path to the input image.

    Returns:
        bool: True if a gradient is detected, False otherwise.
    """
    # Load the image
    # image = cv2.imread(image_path)
    # if image is None:
    #     raise ValueError("Image not found or unable to load.")


    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Step 2: Invert the grayscale image (255 - pixel values)
    inverted_image = 255 - gray_image

    # Step 3: Take the exponential of the pixel values
    # Normalize to the range [0, 1], apply exponential, then scale back to [0, 255]
    exp_image = np.exp(inverted_image / 255.0)/np.e  # Apply exp after normalizing
    exp_image = (exp_image > 0.9)
    orig_mask = 1 - exp_image
    orig_mask = np.clip(orig_mask * 255.0, 0, 255).astype('uint8')
    # orig_mask = Image.fromarray(orig_mask)
    # orig_mask.show()

    three_channel_mask = cv2.cvtColor(orig_mask, cv2.COLOR_GRAY2BGR)

    # Step 3: Mask the RGB image using the grayscale mask (keep pixels where mask is non-zero)
    masked_rgb_image = cv2.bitwise_and(image, three_channel_mask)

    # Image.fromarray(masked_rgb_image).show()

    image = masked_rgb_image
    return image

def analyze_image(actual_path, image_path, gradient_threshold=20, black_threshold=30):
    """
    Analyze an image to find the number of background pixels (non-black regions)
    and compute the sum of a mask where the gradient is below a threshold.

    Parameters:
        image_path (str): Path to the input image.
        gradient_threshold (int): Threshold for the gradient magnitude.
        black_threshold (int): Tolerance for RGB values to classify a pixel as "black."

    Returns:
        background_pixel_count (int): Number of non-black pixels.
        gradient_mask_sum (int): Sum of the gradient mask values (pixels below threshold).
    """
    # Load the image
    image = cv2.imread(image_path)
    act_image = cv2.imread(actual_path)
    if image is None:
        raise ValueError("Image not found or unable to load.")
    height, width = image.shape[:2]
    crop_pixels = 2
    image = image[crop_pixels:height-crop_pixels, crop_pixels:width-crop_pixels]
    image = image_black(image)
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Identify non-black pixels
    non_black_mask = np.any(image > 0, axis=-1)  # Non-black pixels across RGB channels
    non_black_count = np.sum(non_black_mask)


    # Compute the gradient in the x and y directions
    grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)  # Gradient in x-direction
    grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)  # Gradient in y-direction

    # Compute the magnitude of the gradient
    gradient_magnitude = np.sqrt(grad_x**2 + grad_y**2)

    # Normalize the gradient magnitude to range 0-255
    gradient_normalized = cv2.normalize(gradient_magnitude, None, 0, 255, cv2.NORM_MINMAX)

    # Create the mask: 1 if gradient < gradient_threshold, 0 otherwise
    # Find pixels below threshold among non-black pixels
    gradient_mask = (gradient_normalized < gradient_threshold) & non_black_mask
    gradient_mask_sum = np.sum(gradient_mask)

    # # Display the results
    # plt.figure(figsize=(15, 10))
    # plt.subplot(1, 3, 1)
    # plt.title("Original Image")
    # plt.imshow(cv2.cvtColor(act_image, cv2.COLOR_BGR2RGB))
    # plt.axis("off")

    # plt.subplot(1, 3, 2)
    # plt.title("Non-Black Mask")
    # plt.imshow(non_black_mask, cmap='gray')
    # plt.axis("off")

    # plt.subplot(1, 3, 3)
    # plt.title(f"Gradient Mask (Threshold={gradient_threshold})")
    # plt.imshow(gradient_mask * 255, cmap='gray')
    # plt.axis("off")

    # plt.tight_layout()
    # plt.show()

    return non_black_count, gradient_mask_sum
# Save updated JSON with appended explanations
gradient_threshold = 20  # Gradient threshold
black_threshold = 30     # Black pixel threshold
oversmooth_threshold = 0.45

json_file_path = "output/84_task2.json"  # Replace with your JSON file path
with open(json_file_path, "r") as file:
    data = json.load(file)

# Process each entry in the JSON
for entry in data:
    i = entry["index"]
    image_path = f"output/temp/Foreground/foreground_{i}.png"
    actual_path = f"output/temp/super_res/{i}.png"

    # Analyze the image
    background_pixel_count, gradient_mask_sum = analyze_image(actual_path, image_path, gradient_threshold, black_threshold)
    ratio = gradient_mask_sum / background_pixel_count

    if ratio > oversmooth_threshold:
        print(f"Image {image_path}: Oversmoothened background detected.")
        # Initialize explanation if it doesn't exist
        if "explanation" not in entry:
            entry["explanation"] = {}

        # Add or update the explanation for artificial smoothness
        entry["explanation"]["Artificial smoothness"] = "The background exhibits a high degree of smoothing with minimal gradients."

# Save the updated JSON
with open(json_file_path, "w") as file:
    json.dump(data, file, indent=4)
