Install libraries

In [None]:
!pip install open_clip_torch transformers


Mount and access drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import os

folder_path = '/content/drive/MyDrive/NUS Summer 2023/ModelOutputs'

images = []

# List all files in the folder
files = os.listdir(folder_path)

# Filter image files
image_files = [file for file in files if file.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp'))]

for image_file in image_files:
  images.append([image_file, os.path.join(folder_path, image_file)])

print(images)


In [None]:
import open_clip
import torch
#from open_clip import tokenizer

model, _, transform = open_clip.create_model_and_transforms(
  model_name="coca_ViT-L-14",
  pretrained="mscoco_finetuned_laion2B-s13B-b90k"
)

model.cuda().eval()
tokenizer = open_clip.get_tokenizer('coca_ViT-L-14')



Identify prompt

In [19]:
from PIL import Image

loaded_images = []
file_names = []
for [file_name, path_name] in images:
  im = Image.open(path_name).convert("RGB")
  #im = transform(im).unsqueeze(0).cuda()
  loaded_images.append(transform(im))
  file_names.append(file_name)


In [7]:
import numpy as np

prompts = {
    "Style Consistency": [
        "Produce an artwork that seamlessly combines elements from the Cubist, Surrealist, and Pop Art movements, showcasing the distinctive characteristics and visual styles of each movement in a unified composition",
        "Generate a portrait that fuses the unique artistic traits of Vincent van Gogh, Pablo Picasso, and Frida Kahl. The artwork should capture the essence of each artist's style while maintaining coherence and balance",
        "Create a landscape that transitions smoothly through all four seasons (spring, summer, autumn, and winter). The artwork should depict the changing colors, textures, and atmospheric qualities associated with each season, while maintaining a harmonious overall composition",
        "Produce an illustration that combines elements from different architectural styles, such as Gothic, Art Deco, and Modernist, in a cohesive and visually engaging manner. The artwork should highlight key architectural features from each style while maintaining a sense of unity"
    ],
    "Perspective": [
        "Generate an image of a winding road disappearing into the distance, effectively conveying a sense of depth",
        "Design a composition featuring a person looking out from a high balcony, showcasing accurate foreshortening",
        "Create an artwork that showcases a bustling market scene with multiple figures, demonstrating proper spatial relationships",
        "Produce a still life painting of objects placed on a table, capturing the correct proportions and angles from the viewer's perspective"
    ],
    "Creativity": [
        "Generate an abstract artwork that combines geometric shapes and vibrant colors to evoke a sense of energy",
        "Design an imaginative composition featuring surreal elements that challenge the viewer's perception",
        "Create a digital painting that portrays a familiar object or scene in an unexpected and visually compelling way",
        "Produce an artwork inspired by nature that incorporates organic forms and unconventional textures"
    ],
    "Beauty": [
        "Design a portrait of a person, capturing their unique features and conveying their inner beauty",
        "Generate a visually stunning landscape that showcases the grandeur of a mountain range or a serene countryside",
        "Create an artwork that explores the beauty of light and shadow, emphasizing the interplay of highlights and lowlights",
        "Produce a still life painting featuring a bouquet of flowers, focusing on capturing their delicate beauty and vibrant colors"
    ],
    "Composition": [
        "Create a balanced and visually pleasing artwork using the rule of thirds, depicting a serene forest landscape",
        "Generate a dynamic composition with a clear focal point that leads the viewer through a bustling city street during rush hour",
        "Design an asymmetrical composition that captures the intensity and movement of a live music performance",
        "Produce a symmetrical composition featuring a still life arrangement of fruit and a vintage teapot, showcasing harmony and elegance"
    ],
    "Emotional Expression": [
        "Generate an artwork that conveys a sense of tranquility and calmness, depicting a peaceful lakeside sunset",
        "Create an abstract composition that evokes feelings of joy, energy, or excitement, inspired by the vibrant colors of a carnival",
        "Design an artwork that portrays a specific emotion, such as profound sadness, capturing the solitude of a rainy day",
        "Produce a painting that captures the essence of anticipation and curiosity, depicting a child exploring a mysterious forest"
    ]
}
cleanPrompts = list(prompts.values())
cleanPrompts = [item for sublist in cleanPrompts for item in sublist]

text = tokenizer(cleanPrompts)

probabilities = []
predictions = []

image_input = torch.tensor(np.stack(loaded_images)).cuda()
text_tokens = tokenizer(cleanPrompts).cuda()

with torch.no_grad():
  image_features = model.encode_image(image_input).float()
  text_features = model.encode_text(text_tokens).float()

image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T


'\nwith torch.no_grad(), torch.cuda.amp.autocast():\n    text_features = model.encode_text(text.cuda())\n    for [file_name, path_name] in images:\n        im = Image.open(path_name).convert("RGB")\n        im = transform(im).unsqueeze(0).cuda()\n        image_features = model.encode_image(im)\n        image_features /= image_features.norm(dim=-1, keepdim=True)\n        text_features /= text_features.norm(dim=-1, keepdim=True)\n        similarity = text_features.cpu().numpy() @ image_features.cpu().numpy().T\n\n        file_names.append(file_name)\n\n'

In [22]:
print(similarity)
print(similarity.shape)

[[0.3738452  0.37470043 0.37929142 ... 0.27295932 0.29953417 0.32333595]
 [0.21157524 0.28261715 0.22001626 ... 0.17199063 0.20059684 0.22808413]
 [0.24332859 0.20587526 0.24330911 ... 0.26297915 0.245386   0.27642614]
 ...
 [0.34998846 0.32330188 0.3435575  ... 0.20139146 0.2508207  0.21528473]
 [0.26377052 0.28932643 0.27320552 ... 0.21706283 0.24557796 0.24550864]
 [0.23706752 0.23009327 0.25854674 ... 0.27595165 0.23826617 0.29024276]]
(24, 326)


In [65]:
sorted_labels = []
sorted_weights = []
sorted_text = []
for image in similarity.T:
  combined = list(zip(image, cleanPrompts))
  ordered = sorted(combined, reverse=True)
  sorted_labels.append(ordered)
  pure_weights = [num for num, _ in ordered]
  pure_text = [text for _, text in ordered]
  sorted_weights.append(pure_weights)
  sorted_text.append(pure_text)

In [None]:
print(sorted_labels)
print(len(sorted_labels))
print(len(sorted_weights))


In [66]:
import pandas as pd

labelDF = pd.DataFrame({"File" : file_names, "Weights" : sorted_weights, "Labels" : sorted_text})

In [None]:
from google.colab import files

labelDF.to_csv("labels.csv")
files.download("labels.csv")

Generate original captions

In [None]:

new_captions = []
file_names = []
for [file_name, path_name] in images:
  im = Image.open(path_name).convert("RGB")
  im = transform(im).unsqueeze(0).cuda()
  with torch.no_grad(), torch.cuda.amp.autocast():
    generated = model.generate(im)
  caption = open_clip.decode(generated[0]).split("<end_of_text>")[0].replace("<start_of_text>", "")[:-2]
  new_captions.append(caption)
  file_names.append(file_name)

df = pd.DataFrame({"File" : file_names, "Caption" : new_captions})

df.to_csv("captions.csv")
files.download('captions.csv')



