In [None]:
!pip install diffusers
!pip install sentence_transformers

In [None]:
import requests
from PIL import Image
from diffusers import StableDiffusionPipeline
import torch
from transformers import AutoProcessor, Blip2ForConditionalGeneration, AutoImageProcessor, AutoModel
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import torch.nn as nn

In [None]:
# BLIP-2 setup
processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

In [None]:
# Stable Diffusion setup
model_id = "valhalla/sd-wikiart-v2"
pipe = StableDiffusionPipeline.from_pretrained(
		 model_id,
		 torch_dtype=torch.float16
).to(device)

In [None]:
# BERT setup
# Initializing the Sentence Transformer model using BERT with mean-tokens pooling
model_semantic_similarity = SentenceTransformer('bert-base-nli-mean-tokens')

In [None]:
# DINOv2 setup
processor_visual_similarity = AutoImageProcessor.from_pretrained('facebook/dinov2-base')
model_visual_similarity = AutoModel.from_pretrained('facebook/dinov2-base').to(device)

In [None]:
def semantic_similarities(captions):
    # encoding the sentences to obtain their embeddings
    sentence_embeddings = model_semantic_similarity.encode(captions)

    # calculating and plotting the cosine similarity of all iterations relative to the first genereated caption
    semantic_similarity_initial = [float(cosine_similarity([sentence_embeddings[0]], [sentence_embeddings[i + 1]])[0, 0]) for i in range(len(sentence_embeddings) - 1)]

    # calculating and plotting the cosine similarity between consecutive iterations
    semantic_similarity_consecutive = [float(cosine_similarity([sentence_embeddings[i]], [sentence_embeddings[i + 1]])[0, 0]) for i in range(len(sentence_embeddings) - 1)]

    return semantic_similarity_initial, semantic_similarity_consecutive

In [None]:
def visual_similarities(artwork_name, style, iterations):
    folder = f'/content/data/{style}/{artwork_name}'
    image_path = os.path.join(folder, f'image_0.png')
    initial_image = Image.open(image_path)
    with torch.no_grad():
        initial_inputs = processor_visual_similarity(images=initial_image, return_tensors="pt").to(device)
        initial_outputs = model_visual_similarity(**initial_inputs)
        initial_image_features = initial_outputs.last_hidden_state
        initial_image_features = initial_image_features.mean(dim=1)

    visual_similarities_initial = []
    visual_similarities_consecutive = []
    previous_image_features = initial_image_features

    for i in range(iterations):
      image_path = os.path.join(folder, f'image_{i + 1}.png')
      image = Image.open(image_path)
      with torch.no_grad():
          inputs = processor_visual_similarity(images=image, return_tensors="pt").to(device)
          outputs = model_visual_similarity(**inputs)
          image_features = outputs.last_hidden_state
          image_features = image_features.mean(dim=1)

          cos = nn.CosineSimilarity(dim=0)
          sim_initial = cos(initial_image_features[0],image_features[0]).item()
          sim_initial = (sim_initial + 1) / 2

          sim_consecutive = cos(previous_image_features[0],image_features[0]).item()
          sim_consecutive = (sim_consecutive + 1) / 2

          previous_image_features = image_features
          visual_similarities_initial.append(sim_initial)
          visual_similarities_consecutive.append(sim_consecutive)

    return visual_similarities_initial, visual_similarities_consecutive

In [None]:
import os
from PIL import Image
import matplotlib.pyplot as plt

def self_poison_image(image_path, artwork_name, style, iterations):
    output_folder = f'data/{style}/{artwork_name}'
    os.makedirs(output_folder, exist_ok=True)
    image_output_path = os.path.join(output_folder, f'image_0.png')
    image = Image.open(image_path)
    image.save(image_output_path)

    captions = []
    images = [image_output_path]

    for i in range(iterations):
        # Image-to-text
        prompt = prompt = f"Question: Describe the contents of this artwork. Answer:"
        inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
        generated_ids = model.generate(**inputs, min_length=16, max_length=50, num_beams=5)
        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()

        # Text-to-image
        result = pipe(generated_text)
        image = result.images[0]

        image_output_path = os.path.join(output_folder, f'image_{i + 1}.png')
        image.save(image_output_path)
        images.append(image_output_path)  # Save the image path
        captions.append(generated_text)

    return images, captions

In [None]:
def process_image(image_path, artwork_name, style, iterations=10):
    images, captions = self_poison_image(image_path, artwork_name, style, iterations)

    # semantic similarity metric BERT
    semantic_similarity_initial, semantic_similarity_consecutive = semantic_similarities(captions)

    # image similarity metric DINOv2
    visual_similarity_initial, visual_similarity_consecutive = visual_similarities(artwork_name, style, iterations)

    generated_data = {'images': images,
                      'captions': captions,
                      'semantic_similarity_initial': semantic_similarity_initial,
                      'semantic_similarity_consecutive': semantic_similarity_consecutive,
                      'visual_similarity_initial': visual_similarity_initial,
                      'visual_similarity_consecutive': visual_similarity_consecutive}
    print(generated_data)
    return generated_data

In [None]:
import json
import os
from PIL import Image
import shutil

input_dir = 'wikiart_samples'

for style in os.listdir(input_dir):
    style_path = os.path.join(input_dir, style)
    if os.path.isdir(style_path):
        print(f"Processing style: {style}")

        for image_filename in os.listdir(style_path):
            if image_filename.lower().endswith(('.png', '.jpg', '.jpeg')):
                artwork_name = os.path.splitext(image_filename)[0]
                print(f"Processing artwork {artwork_name} in style {style}")

                image_path = os.path.join(style_path, image_filename)
                generated_data = process_image(image_path, artwork_name, style, 100)

                output_folder = f'data/{style}/{artwork_name}'
                os.makedirs(output_folder, exist_ok=True)

                json_file_path = os.path.join(output_folder, 'data.json')
                with open(json_file_path, 'w') as json_file:
                    json.dump(generated_data, json_file, indent=4)

                zip_file_path = os.path.join(f'data/{style}', f'{artwork_name}.zip')
                shutil.make_archive(base_name=zip_file_path.replace('.zip', ''), format='zip', root_dir=output_folder)