In [11]:
import os
from openai import OpenAI

client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
)

In [12]:
SYSTEM_PROMPT = "You are a rigorous and responsible image tampering (altering) detection expert.  \
    You can localize the exact tampered region and analyze your detection decision according to tampering clues at different levels.  \
    Assuming that you have detected this is a Fake image and the manipulation type is one of {photoshop, ai-generated},  \
The following analysis supports the authenticity of the image, with observations categorized into high-level semantic coherence, middle-level visual consistency, and low-level pixel statistics.\
# High-Level Semantic Coherence\
Consistency with Common Sense\
The content is entirely plausible, aligning with real-world expectations. The scene reflects a natural, truthful setting with no misleading elements.\
# Middle-Level Visual Consistency\
Consistent Lighting\
\
The lighting throughout the image is coherent, with shadows, highlights, and reflections properly aligned with the light source, creating a realistic appearance.\
Compliance with Physical Constraints\
\
All interactions and placements of objects adhere to physical laws, such as gravity and balance, ensuring that the scene is plausible in the real world.\
Consistent Perspective\
\
The spatial relationships between objects are logically arranged, with no distortion, and the size, scale, and orientation of elements align with natural perspective rules.\
# Low-Level Pixel Statistics\
Cohesive Color Distribution\
\
The colors and tones in the image are smooth and cohesive, with no abrupt transitions, ensuring a natural blend across the scene.\
Uniform Texture and Sharpness\
\
The texture and sharpness are evenly distributed, with no areas appearing artificially smoothed, grainy, or oversharpened.\
Consistent Noise Patterns\
\
The noise distribution across the image is uniform, with no localized discrepancies or abrupt changes that could suggest editing or manipulation. Plese give your answser base on the image:"

In [13]:
import base64
from io import BytesIO
from PIL import Image

def encode_image(image, quality=100):
    if image.mode != 'RGB':
        image = image.convert('RGB')  # Convert to RGB
    buffered = BytesIO()
    image.save(buffered, format="JPEG", quality=quality) 
    return base64.b64encode(buffered.getvalue()).decode("utf-8")

In [16]:
def forgery_image_analysis(image_path, quality=50):
    with Image.open(image_path) as img:
        img_b64_str = encode_image(img, quality=quality)
    img_type = "image/jpeg"
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": SYSTEM_PROMPT},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:{img_type};base64,{img_b64_str}"},
                    },
                ],
            }
        ],
    )
    return response

In [19]:
response = forgery_image_analysis("./datasets/CASIA2/images/Tp_D_CNN_M_B_nat00056_nat00099_11105.jpg")

In [26]:
response.choices[0].message.content

"To assess the authenticity of the image you provided using the structured analysis approach, we will examine it across three levels: high-level semantic coherence, middle-level visual consistency, and low-level pixel statistics.\n\n### High-Level Semantic Coherence\n\n- **Consistency with Common Sense**: The image appears to depict a natural landscape featuring water and mountains, which is plausible. No misleading elements are present that would suggest an unnatural setting.\n\n### Middle-Level Visual Consistency\n\n- **Consistent Lighting**: The lighting seems coherent, with highlights and reflections appropriate for the water's surface and shadows consistent with the mountainous backdrop. This adds to the realism of the image.\n  \n- **Compliance with Physical Constraints**: The interaction of the water with the land and the mountains appears realistic, adhering to natural laws such as gravity and flow.\n\n- **Consistent Perspective**: The perspective is logical; the size and scale

In [32]:
prompt_pridict = "You are an image forgery detector. \
Based on the semantic analysis provided for the image and the original image itself, \
determine if the image has been tampered with. You should analyze the image at three levels: \
high-level semantic coherence, middle-level visual consistency, and low-level pixel statistics. \
Based on these analyses, provide your result in the following JSON format:\
{\
      \"forgery\": true/false,\
  \"explain\": \"Explanation of your analysis at high-level semantic coherence, middle-level visual consistency, and low-level pixel statistics.\",\
  \"confidence\": 0-100\
}\
"


In [33]:
def forgery_image_detect(image_path, analysis, quality=50):
    with Image.open(image_path) as img:
        img_b64_str = encode_image(img, quality=quality)
    img_type = "image/jpeg"
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": prompt_pridict+analysis},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:{img_type};base64,{img_b64_str}"},
                    },
                ],
            }
        ],
    )
    return response

In [34]:
forgery_image_detect("./datasets/CASIA2/images/Tp_D_CNN_M_B_nat00056_nat00099_11105.jpg", response.choices[0].message.content)

ChatCompletion(id='chatcmpl-Au3Mf4wx2Hbc0ycMTXYdeqw2zsCoK', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='```json\n{\n  "forgery": false,\n  "explain": "The image demonstrates high-level semantic coherence as it features a natural landscape with water and mountains, which is plausible and lacks any misleading elements. At the middle level, the lighting is consistent, the interaction of elements adheres to physical constraints, and the perspective appears logical. Low-level analysis reveals cohesive color distribution, uniform texture and sharpness, and consistent noise patterns throughout the image, indicating no signs of manipulation.",\n  "confidence": 85\n}\n```', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1737922665, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier='default', system_fingerprint='fp_bd83329f63', usage=CompletionUsage(completion_tokens=109, pr