In [None]:
!pip install qwen-vl-utils
!pip install jsonformer

In [None]:
import pandas as pd
import time
import torch
from PIL import Image
import requests
import matplotlib.pyplot as plt
from transformers import Qwen2VLForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer, AutoProcessor, BitsAndBytesConfig, AutoModelForVision2Seq
from qwen_vl_utils import process_vision_info
import os
import gc
from tqdm.notebook import tqdm_notebook as tqdm   
import json
import re
from jsonformer import Jsonformer

In [None]:
model_id ="/kaggle/input/qwen7bfinetuned/qwen2-7b-instruct-artifact"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)
 
model = Qwen2VLForConditionalGeneration.from_pretrained(model_id, device_map="auto",quantization_config=bnb_config)
processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")

In [None]:
classification_prompt = """
TASK: You are given an AI Generated Image and you are supposed to find out which groups of problems are present in the particular image. You can select multiple groups from the provided context.
Explain reasoning behind choosing the groups. 

Groups:-
AI Defects: Floating parts, Noise on flat areas, Weird perspective, Blurred details, Ghosting/Repeats

Biological Defects: Misaligned, Deformed, Fur errors, Unrealistic eyes, Asymmetry

Overprocessing: Grid artifacts, Cinematic look, Over-sharpening, Dramatic lighting, Scale issues

Reality Breaks: Non-manifold, Asymmetric, Proportion errors, Impossible joints, Jagged edges

Scene Oddities: Metallic artifacts, Distorted reflections, Specular issues, Shadow inconsistencies, Glossy surfaces

Texture Defects: Depth anomalies, Blurred edges, Aliasing, Texture bleeding, Fake depth, Synthetic look, Color breaks

Remember: Do not bolden or number your headings, Just mention the heading and a newline between the heading and explanation
"""

In [None]:
AI_defects = """
TASK: For the provided artifacts, Shortlist the artifacts which are applicable to the given image.
Generate proper explanations for the artifacts shortlisted. EXPLAIN IN WHICH PART OF THE IMAGE THE ARTIFACT IS EXHIBITED.
Omit the artifact if it is not applicable to the image. Explain strictly in context of the given image.

artifacts:- 
Floating or disconnected components
Artificial noise patterns in uniform surfaces
Incorrect perspective rendering
Spatial relationship errors
Random noise patterns in detailed areas
Loss of fine detail in complex structures
Artificial enhancement artifacts
Abruptly cut off objects
Ghosting effects: Semi-transparent duplicates of elements
Repeated Element Patterns

"""

In [None]:
Reality_breaks = """
TASK: For the provided artifacts, Shortlist the artifacts which are applicable to the given image. 
Generate proper explanations for the artifacts shortlisted. EXPLAIN IN WHICH PART OF THE IMAGE THE ARTIFACT IS EXHIBITED.
Omit the artifact if it is not applicable to the image. Explain strictly in context of the given image.

artifacts:- 
Non-manifold geometries in rigid structures
Asymmetric features in naturally symmetric objects
Irregular proportions in mechanical components 
Inconsistent material properties
Impossible mechanical connections
Inconsistent scale of mechanical parts
Physically impossible structural elements
Jagged edges in curved structures
Implausible aerodynamic structures
Impossible mechanical joints
Impossible foreshortening in animal bodies
Discontinuous surfaces

"""

In [None]:
Surface_Depth_Edge_Problems = """
TASK: For the provided artifacts, Shortlist the artifacts which are applicable to the given image. 
Generate proper explanations for the artifacts shortlisted. EXPLAIN IN WHICH PART OF THE IMAGE THE ARTIFACT IS EXHIBITED.
Omit the artifact if it is not applicable to the image. Explain strictly in context of the given image.

artifacts:- 
Discontinuous surfaces
Over-smoothing of natural textures 
Depth perception anomalies
Blurred boundaries in fine details
Aliasing along high-contrast edges
Texture bleeding between adjacent regions
Fake depth of field
Glow or light bleed around object boundaries
Artificial depth of field in object presentation
Synthetic material appearance
Color coherence breaks
Inconsistent object boundaries
Systematic color distribution anomalies
Resolution inconsistencies within regions
Texture repetition patterns

"""

In [None]:
Biological_issues = """
TASK: For the provided artifacts, Shortlist the artifacts which are applicable to the given image. 
Generate proper explanations for the artifacts shortlisted. EXPLAIN IN WHICH PART OF THE IMAGE THE ARTIFACT IS EXHIBITED.
Omit the artifact if it is not applicable to the image. Explain strictly in context of the given image.

artifacts:- 
Misaligned bilateral elements in animal faces 
Dental anomalies in mammals 
Anatomically incorrect paw structures
Improper fur direction flows
Unrealistic eye reflections
Misshapen ears or appendages
Anatomically impossible joint configurations
Unnatural pose artifacts
Biological asymmetry errors
Incorrect Skin Tones
Misaligned body panels

"""

In [None]:
Scene_oddities = """
TASK: For the provided artifacts, Shortlist the artifacts which are applicable to the given image.
Generate proper explanations for the artifacts shortlisted. EXPLAIN IN WHICH PART OF THE IMAGE THE ARTIFACT IS EXHIBITED.
Omit the artifact if it is not applicable to the image. Explain strictly in context of the given image.

artifacts:- 
Metallic surface artifacts 
Distorted window reflections
Unrealistic specular highlights
Inconsistent shadow directions
Multiple light source conflicts
Missing ambient occlusion
Incorrect reflection mapping
Unnaturally glossy surfaces
Unnatural Lighting Gradients
Unnatural color transitions
Multiple inconsistent shadow sources
Glow or light bleed around object boundaries

"""

In [None]:
Over_Processing = """
TASK: For the provided artifacts, Shortlist the artifacts which are applicable to the given image. 
Generate proper explanations for the artifacts shortlisted. EXPLAIN IN WHICH PART OF THE IMAGE THE ARTIFACT IS EXHIBITED.
Omit the artifact if it is not applicable to the image. Explain strictly in context of the given image.

artifacts:- 
Regular grid-like artifacts in textures
Movie-poster like composition of ordinary scenes
Cinematization Effects
Excessive sharpness in certain image regions
Dramatic lighting that defies natural physics
Exaggerated characteristic features
Scale inconsistencies within the same object class
Frequency domain signatures
Over-sharpening artifacts
Artificial smoothness
Over-smoothing of natural textures 
Scale inconsistencies within single objects

"""

In [None]:
artifacts_text = """
- Inconsistent object boundaries
- Discontinuous surfaces
- Non-manifold geometries in rigid structures
- Floating or disconnected components
- Asymmetric features in naturally symmetric objects 
- Misaligned bilateral elements in animal faces 
- Irregular proportions in mechanical components 
- Texture bleeding between adjacent regions
- Texture repetition patterns
- Over-smoothing of natural textures 
- Artificial noise patterns in uniform surfaces
- Unrealistic specular highlights
- Inconsistent material properties
- Metallic surface artifacts 
- Dental anomalies in mammals 
- Anatomically incorrect paw structures
- Improper fur direction flows
- Unrealistic eye reflections
- Misshapen ears or appendages
- Impossible mechanical connections
- Inconsistent scale of mechanical parts
- Physically impossible structural elements
- Inconsistent shadow directions
- Multiple light source conflicts
- Missing ambient occlusion
- Incorrect reflection mapping
- Incorrect perspective rendering
- Scale inconsistencies within single objects
- Spatial relationship errors
- Depth perception anomalies
- Over-sharpening artifacts
- Aliasing along high-contrast edges
- Blurred boundaries in fine details
- Jagged edges in curved structures
- Random noise patterns in detailed areas
- Loss of fine detail in complex structures
- Artificial enhancement artifacts
- Incorrect wheel geometry
- Implausible aerodynamic structures
- Misaligned body panels
- Impossible mechanical joints
- Distorted window reflections
- Anatomically impossible joint configurations
- Unnatural pose artifacts
- Biological asymmetry errors
- Regular grid-like artifacts in textures
- Repeated element patterns
- Systematic color distribution anomalies
- Frequency domain signatures
- Color coherence breaks
- Unnatural color transitions
- Resolution inconsistencies within regions
- Unnatural Lighting Gradients
- Incorrect Skin Tones
- Fake depth of field
- Abruptly cut off objects
- Glow or light bleed around object boundaries
- Ghosting effects: Semi-transparent duplicates of elements
- Cinematization Effects
- Excessive sharpness in certain image regions
- Artificial smoothness
- Movie-poster like composition of ordinary scenes
- Dramatic lighting that defies natural physics
- Artificial depth of field in object presentation
- Unnaturally glossy surfaces
- Synthetic material appearance
- Multiple inconsistent shadow sources
- Exaggerated characteristic features
- Impossible foreshortening in animal bodies
- Scale inconsistencies within the same object class
"""

artifacts_list = [line.strip("- ").strip() for line in artifacts_text.strip().split("\n") if line.strip()]

In [None]:
def process_image_and_prompt(model, processor, image_path, prompt, max_tokens=1024):
    # Load and display the image
    if image_path.startswith('http'):
        image = Image.open(requests.get(image_path, stream=True).raw)
    else:
        image = Image.open(image_path)
    
    # Prepare messages
    messages = [
        {
            "role": "system",
            "content" : [
                {"type": "text", "text": "You are an expert in identifying and analyzing artifacts that indicate why the image may appear unnatural or fake"}
            ]
        },
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image},
                {"type": "text", "text": prompt}
            ]
        }
    ]
    
    # Process input
    input_text = processor.apply_chat_template(messages, tokenize = False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
        text = input_text,
        images = image_inputs,
        videos = video_inputs,
        padding = True,
        add_special_tokens=False,
        return_tensors="pt"
    ).to(model.device)
    
    # Generate output
    with torch.inference_mode():
        output = model.generate(**inputs, temperature = 0.1, max_new_tokens=max_tokens)
    
    # Decode output
        generated_ids_trimmed = [
            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, output)
        ]
        response = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
    
    # Clean up memory
    del inputs, output
    
    return response

In [None]:
def extract_artifacts(text):
    # Initialize dictionary to store results
    artifacts = {}
    
    # Split text into lines and process
    current_category = None
    
    for line in text.strip().split('\n'):
        if ':' in line:
            # Get category and items
            category, items = line.split(':')
            items = items.strip()
            
            # If items is not "None", split them and add to dictionary
            if items.lower() != "none":
                # Split items by comma and clean up each item
                item_list = [item.strip() for item in items.split(',')]
                artifacts[category] = item_list
                
    return artifacts

In [None]:
def get_json_outputs(model, processor, response, max_tokens=1024):
    
    messages = [
        {"role": "system", "content": """
            You are an assistant that refines explanations of potential artifacts in images. 
            The explanations are given in a structured format, and your job is to analyse each explanation parse them in json format.
                
        """}, 
        {"role": "user", "content": response}
    ]

    input_text = processor.apply_chat_template(messages, tokenize = False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
        text = input_text,
        images = image_inputs,
        videos = video_inputs,
        padding = True,
        add_special_tokens=False,
        return_tensors="pt"
    ).to(model.device)
    
    # Generate output
    with torch.inference_mode():
        output = model.generate(**inputs, temperature = 0.1, max_new_tokens=max_tokens)
    
    # Decode output
        generated_ids_trimmed = [
            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, output)
        ]
        json_response = processor.batch_decode(
            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
        )
    
    # Clean up memory
    del inputs, output

    return json_response


In [None]:
def classify_image(imagelink):
    # Start timing
    start = time.time()
    
    # Display the image
    img = Image.open(imagelink)
    plt.imshow(img)
    plt.axis('off')
    plt.title('Input Image')
    plt.show()

    categories = {
        "AI Defects" : AI_defects,
        "Biological Defects" : Biological_issues,
        "Overprocessing" : Over_Processing,
        "Scene Oddities" : Scene_oddities,
        "Reality Breaks" : Reality_breaks,
        "Texture Defects": Surface_Depth_Edge_Problems
    }

    classification_response = process_image_and_prompt(model, processor, imagelink, classification_prompt, 142)[0]

    artifacts = extract_artifacts(classification_response)
    group_responses = {}
    json_responses = []

    possible_keys = [
    "AI Defects",
    "Biological Defects",
    "Overprocessing",
    "Scene Oddities",
    "Reality Breaks",
    "Texture Defects"]
        
    for key, values in artifacts.items():
        if key not in possible_keys:
            continue

        a1 = time.time()
        
        problems = ""
        for value in values:
            problems += value + ", " 
        prompt = f""" The image is detected to have the following problems: \n {problems}

        {categories[key]}
        """
        group_response = process_image_and_prompt(model, processor, imagelink, prompt)[0]
        group_responses[key] = group_response
        
        json_response = get_json_outputs(model, processor, group_response)
        json_responses.append(json_response)

        
    # Print total processing time

    return classification_response, group_responses, json_responses

In [None]:
classification_response, group_responses, json_responses = classify_image("/kaggle/input/testdataadobe/perturbed_images_32/67.png")

In [None]:
image_dir = "/kaggle/input/testdataadobe/perturbed_images_32"

In [None]:
image_paths = []

start = 1
end = 301
for i in range(start, end):
    image_paths.append(image_dir + "/" + str(i) + ".png")

In [None]:
all_classifications = {}
all_explanations = {}
all_jsons = {}

In [None]:
for path in tqdm(image_paths):
    classification_response, group_responses, json_responses = classify_image(path)
    all_classification[path] = classification_response
    all_explanations[path] = group_responses
    all_jsons[path] = json_responses

In [None]:
import pickle

# Define filenames for each dictionary
classification_file = "all_classification.pkl"
explanations_file = "all_explanations.pkl"
jsons_file = "all_jsons.pkl"

# Save each dictionary as a pickle file
with open(classification_file, "wb") as f:
    pickle.dump(all_classification, f)

with open(explanations_file, "wb") as f:
    pickle.dump(all_explanations, f)

with open(jsons_file, "wb") as f:
    pickle.dump(all_jsons, f)

print("Files saved successfully:")
print(f"- {classification_file}")
print(f"- {explanations_file}")
print(f"- {jsons_file}")