<a href="https://colab.research.google.com/github/imemmul/google-research/blob/rarity/dpok/dpok_nft/ImageReward/score_evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/imemmul/google-research.git

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ls

In [None]:
%cd google-research/dpok/dpok_nft

In [None]:
%%writefile run_clip.py
import os
import argparse
from PIL import Image
import requests
from transformers import CLIPProcessor, CLIPModel
from ImageReward.models.CLIPScore import CLIPScore
import torch
import torchvision.transforms as transforms
import numpy as np
import pandas as pd

def parse_arguments():
    parser = argparse.ArgumentParser(description="Run aesthetic scoring on a dataset using CLIP.")
    parser.add_argument("--dataset_path", type=str, required=True, help="Path to the dataset containing images.")
    # parser.add_argument("--prompt", type=str, default="aesthetic beauty", help="Text prompt describing the aesthetic criteria.")
    return parser.parse_args()

def main():
    args = parse_arguments()
    model_name = "openai/clip-vit-large-patch14"
    clip_score = CLIPScore(model_name, device="cuda")
    #transform = transforms.Compose([
    #    transforms.Resize((224, 224)),
    #    transforms.ToTensor(),
    #    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    #])

    image_paths = [os.path.join(args.dataset_path, filename) for filename in os.listdir(args.dataset_path) if filename.endswith((".png"))]
    scores = []
    indexes = np.load("/content/drive/MyDrive/FID_evaluation/indexes.npy")
    df = pd.read_csv("/content/drive/MyDrive/output_captioning/NFT_DATASET_MERGED/train/metadata.csv")
    prompts = [df.iloc[idx]['text'] for idx in indexes]
    for prompt, image_path in zip(prompts, image_paths):
        #image = transform(image).unsqueeze(0).to("cuda")

        score = clip_score.score(prompt, image_path)
        scores.append(score)

    final_clip_score = sum(scores) / len(scores)
    print("Final CLIP Score:", final_clip_score)
if __name__ == "__main__":
    main()


In [None]:
!pip install git+https://github.com/openai/CLIP.git

In [None]:
!pip install timm

In [None]:
!pip install fairscale

In [None]:
!python run_clip.py --dataset_path /content/drive/MyDrive/FID_evaluation/real_images

In [None]:
!python run_clip.py --dataset_path /content/drive/MyDrive/FID_evaluation/generated_sft

In [None]:
!python run_clip.py --dataset_path /content/drive/MyDrive/FID_evaluation/generated_dpok_sonolsunartik

In [None]:
%%writefile med_config.json
{
  "architectures": [
    "BertModel"
  ],
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "type_vocab_size": 2,
  "vocab_size": 30524,
  "encoder_width": 768,
  "add_cross_attention": true
}

In [None]:
%%writefile run_blip.py
import os
import argparse
from PIL import Image
from ImageReward.models.BLIPScore import BLIPScore
import torch
import torchvision.transforms as transforms
import numpy as np
import pandas as pd

def parse_arguments():
    parser = argparse.ArgumentParser(description="Run aesthetic scoring on a dataset using BLIP.")
    parser.add_argument("--dataset_path", type=str, required=True, help="Path to the dataset containing images.")
    return parser.parse_args()

def main():
    args = parse_arguments()
    blip_score = BLIPScore("/content/google-research/dpok/dpok_nft/med_config.json", device="cpu")

    image_paths = [os.path.join(args.dataset_path, filename) for filename in os.listdir(args.dataset_path) if filename.endswith((".png"))]
    scores = []
    indexes = np.load("/content/drive/MyDrive/FID_evaluation/indexes.npy")
    df = pd.read_csv("/content/drive/MyDrive/output_captioning/NFT_DATASET_MERGED/train/metadata.csv")
    prompts = [df.iloc[idx]['text'] for idx in indexes]

    for prompt, image_path in zip(prompts, image_paths):
        score = blip_score.score(prompt, image_path)
        scores.append(score)

    final_blip_score = sum(scores) / len(scores)
    print("Final BLIP Score:", final_blip_score)

if __name__ == "__main__":
    main()

In [None]:
!python run_blip.py --dataset_path /content/drive/MyDrive/FID_evaluation/real_images

In [None]:
!python run_blip.py --dataset_path /content/drive/MyDrive/FID_evaluation/generated_sft

In [None]:
!python run_blip.py --dataset_path /content/drive/MyDrive/FID_evaluation/generated_dpok_sonolsunartik

In [None]:
%%writefile run_ir.py
import os
import argparse
from PIL import Image
from ImageReward.ImageReward import ImageReward
import torch
import torchvision.transforms as transforms
import numpy as np
import pandas as pd

def parse_arguments():
    parser = argparse.ArgumentParser(description="Run aesthetic scoring on a dataset using ImageReward.")
    parser.add_argument("--dataset_path", type=str, required=True, help="Path to the dataset containing images.")
    parser.add_argument("--med_config", type=str, required=True, help="Path to the med_config.json file.")
    return parser.parse_args()

def main():
    args = parse_arguments()
    image_reward = ImageReward(args.med_config, device="cpu")

    image_paths = [os.path.join(args.dataset_path, filename) for filename in os.listdir(args.dataset_path) if filename.endswith((".png"))]
    scores = []
    indexes = np.load("/content/drive/MyDrive/FID_evaluation/indexes.npy")
    df = pd.read_csv("/content/drive/MyDrive/output_captioning/NFT_DATASET_MERGED/train/metadata.csv")
    prompts = [df.iloc[idx]['text'] for idx in indexes]

    for prompt, image_path in zip(prompts, image_paths):
        score = image_reward.score(prompt, image_path)
        scores.append(score)

    final_image_reward_score = sum(scores) / len(scores)
    print("Final ImageReward Score:", final_image_reward_score)

if __name__ == "__main__":
    main()


In [None]:
!python run_ir.py --dataset_path /content/drive/MyDrive/FID_evaluation/real_images --med_config /content/google-research/dpok/dpok_nft/med_config.json

In [None]:
!python run_ir.py --dataset_path /content/drive/MyDrive/FID_evaluation/generated_sft --med_config /content/google-research/dpok/dpok_nft/med_config.json

In [None]:
!python run_ir.py --dataset_path /content/drive/MyDrive/FID_evaluation/generated_dpok_sonolsunartik --med_config /content/google-research/dpok/dpok_nft/med_config.json

In [None]:
%%writefile run_as.py
import os
import argparse
from PIL import Image
from ImageReward.models.AestheticScore import AestheticScore
import torch
import torchvision.transforms as transforms
import numpy as np
import pandas as pd

def parse_arguments():
    parser = argparse.ArgumentParser(description="Run aesthetic scoring on a dataset using AestheticScore.")
    parser.add_argument("--dataset_path", type=str, required=True, help="Path to the dataset containing images.")
    #parser.add_argument("--download_root", type=str, required=True, help="Path to the directory where the CLIP model is downloaded.")
    return parser.parse_args()

def main():
    args = parse_arguments()
    aesthetic_score = AestheticScore("openai/clip-vit-large-patch14", device="cpu")
    image_paths = [os.path.join(args.dataset_path, filename) for filename in os.listdir(args.dataset_path) if filename.endswith((".png"))]
    scores = []
    indexes = np.load("/content/drive/MyDrive/FID_evaluation/indexes.npy")
    df = pd.read_csv("/content/drive/MyDrive/output_captioning/NFT_DATASET_MERGED/train/metadata.csv")
    prompts = [df.iloc[idx]['text'] for idx in indexes]

    for prompt, image_path in zip(prompts, image_paths):
        score = aesthetic_score.score(prompt, image_path)
        scores.append(score)

    final_aesthetic_score = sum(scores) / len(scores)
    print("Final Aesthetic Score:", final_aesthetic_score)

if __name__ == "__main__":
    main()
