In [None]:
# Run this line in Colab to install the package if it is
# not already installed.
!pip install git+https://github.com/openai/glide-text2im

# Get the Library containing the Art Style
!git clone https://github.com/PaddlePaddle/PaddleGAN

In [None]:
# Connect the google drive repo
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Import the Coco Dataset
from pycocotools.coco import COCO

# Set paths for the coco dataset
dataDir='/content/drive/MyDrive/COCOdataset2017Airplanes'
dataType='train2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)

In [None]:
# Different imports used by the Text to Image model
from PIL import Image
from IPython.display import display
import torch as th
import cv2

from glide_text2im.download import load_checkpoint
from glide_text2im.model_creation import (
    create_model_and_diffusion,
    model_and_diffusion_defaults,
    model_and_diffusion_defaults_upsampler
)

In [None]:
# This notebook supports both CPU and GPU.
# On CPU, generating one sample may take on the order of 20 minutes.
# On a GPU, it should be under a minute.
has_cuda = th.cuda.is_available()
device = th.device('cpu' if not has_cuda else 'cuda')

In [None]:
# Create base model.
options = model_and_diffusion_defaults()
options['use_fp16'] = has_cuda
options['timestep_respacing'] = '100' # use 100 diffusion steps for fast sampling
model, diffusion = create_model_and_diffusion(**options)
model.eval()
if has_cuda:
    model.convert_to_fp16()
model.to(device)
model.load_state_dict(load_checkpoint('base', device))
print('total base parameters', sum(x.numel() for x in model.parameters()))

In [None]:
# Create upsampler model.
options_up = model_and_diffusion_defaults_upsampler()
options_up['use_fp16'] = has_cuda
options_up['timestep_respacing'] = 'fast27' # use 27 diffusion steps for very fast sampling
model_up, diffusion_up = create_model_and_diffusion(**options_up)
model_up.eval()
if has_cuda:
    model_up.convert_to_fp16()
model_up.to(device)
model_up.load_state_dict(load_checkpoint('upsample', device))
print('total upsampler parameters', sum(x.numel() for x in model_up.parameters()))

In [None]:
def show_images(batch: th.Tensor):
    """ Display a batch of images inline. """
    scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
    reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
    display(Image.fromarray(reshaped.numpy()))

In [None]:
def save_image(batch: th.Tensor, i):
  """ Save an image into the colab space """
  scaled = ((batch + 1)*127.5).round().clamp(0,255).to(th.uint8).cpu()
  reshaped = scaled.permute(2, 0, 3, 1).reshape([batch.shape[2], -1, 3])
  
  img = Image.fromarray(reshaped.numpy())
  path = "/content/Images/output_text2img_" + str(i) + ".png"
  img.save(path,"PNG")

In [None]:
# Initialize the COCO api for instance annotations
coco=COCO(annFile)

# Load the categories in a variable
catIDs = coco.getCatIds()
cats = coco.loadCats(catIDs)

# Define the classes (out of the 81) which you want to see. Others will not be shown.
filterClasses = ['airplane']

# Fetch class IDs only corresponding to the filterClasses
catIds = coco.getCatIds(catNms=filterClasses) 
# Get all images containing the above Category IDs
imgIds = coco.getImgIds(catIds=catIds)

# initialize COCO API for caption captions
captions_annFile = '{}/annotations/captions_{}_planes.json'.format(dataDir,dataType)
coco_caps = COCO(captions_annFile)

In [None]:
# Lists to contain the images info
images_annot = []
images_paths = []
images_styles = []

# Run on the Test set part
for i in range(2586, 2985):
  # Training set from 0 to 1999
  # Validation set from 2000 to 2585
  # Test set from 2586 to 2985

  # Get annotations and their positions
  annIds = coco_caps.getAnnIds(imgIds=imgIds[i])
  anns = coco_caps.loadAnns(annIds)

  # Split the annotations to have the art style and the caption separated
  splitted = anns[0]['caption'].split(" as ")
  
  # Keep the caption
  images_annot.append(splitted[0])
  images_styles.append(splitted[1].split(".")[0])

  #Get file paths
  img=coco.loadImgs(imgIds[i])[0]
  content_img_path='{}/New_Art_Airplanes_Final/{}'.format(dataDir,img['file_name'])

  # Keep the path
  images_paths.append(content_img_path)

In [None]:
# Run over 400 test images
for i in range(0, 400):
  # Sampling parameters
  prompt = images_annot[i]
  batch_size = 1
  guidance_scale = 3.0

  # Tune this parameter to control the sharpness of 256x256 images.
  # A value of 1.0 is sharper, but sometimes results in grainy artifacts.
  upsample_temp = 0.997

  ##############################
  # Sample from the base model #
  ##############################

  # Create the text tokens to feed to the model.
  tokens = model.tokenizer.encode(prompt)
  tokens, mask = model.tokenizer.padded_tokens_and_mask(
      tokens, options['text_ctx']
  )

  # Create the classifier-free guidance tokens (empty)
  full_batch_size = batch_size * 2
  uncond_tokens, uncond_mask = model.tokenizer.padded_tokens_and_mask(
      [], options['text_ctx']
  )

  # Pack the tokens together into model kwargs.
  model_kwargs = dict(
      tokens=th.tensor(
          [tokens] * batch_size + [uncond_tokens] * batch_size, device=device
      ),
      mask=th.tensor(
          [mask] * batch_size + [uncond_mask] * batch_size,
          dtype=th.bool,
          device=device,
      ),
  )

  # Create a classifier-free guidance sampling function
  def model_fn(x_t, ts, **kwargs):
     half = x_t[: len(x_t) // 2]
     combined = th.cat([half, half], dim=0)
     model_out = model(combined, ts, **kwargs)
     eps, rest = model_out[:, :3], model_out[:, 3:]
     cond_eps, uncond_eps = th.split(eps, len(eps) // 2, dim=0)
     half_eps = uncond_eps + guidance_scale * (cond_eps - uncond_eps)
     eps = th.cat([half_eps, half_eps], dim=0)
     return th.cat([eps, rest], dim=1)

  # Sample from the base model.
  model.del_cache()
  samples = diffusion.p_sample_loop(
      model_fn,
      (full_batch_size, 3, options["image_size"], options["image_size"]),
      device=device,
      clip_denoised=True,
      progress=True,
      model_kwargs=model_kwargs,
      cond_fn=None,
  )[:batch_size]
  model.del_cache()

  ##############################
  # Upsample the 64x64 samples #
  ##############################

  tokens = model_up.tokenizer.encode(prompt)
  tokens, mask = model_up.tokenizer.padded_tokens_and_mask(
      tokens, options_up['text_ctx']
  )

  # Create the model conditioning dict.
  model_kwargs = dict(
      # Low-res image to upsample.
      low_res=((samples+1)*127.5).round()/127.5 - 1,

      # Text tokens
      tokens=th.tensor(
          [tokens] * batch_size, device=device
      ),
      mask=th.tensor(
          [mask] * batch_size,
          dtype=th.bool,
          device=device,
      ),
  )

  # Sample from the base model.
  model_up.del_cache()
  up_shape = (batch_size, 3, options_up["image_size"], options_up["image_size"])
  up_samples = diffusion_up.ddim_sample_loop(
      model_up,
      up_shape,
      noise=th.randn(up_shape, device=device) * upsample_temp,
      device=device,
      clip_denoised=True,
      progress=True,
      model_kwargs=model_kwargs,
      cond_fn=None,
  )[:batch_size]
  model_up.del_cache()

  # Save & show the output
  # Show_images(up_samples)
  save_image(up_samples, i)

In [None]:
# Start with Paddle, begin the Art Style
%cd PaddleGAN/

# Install required packages
!pip install -r requirements.txt
!pip install paddlepaddle

import requests

# Initialize the distances variables
distanceTotal = 0
distances_list = []

for i in range(0,400):
  # Paths
  PATH_OF_CONTENT_IMG = '/content/Images/output_text2img_' + str(i) + '.png'
  PATH_OF_STYLE_IMG = '/content/Style_Images/' + images_styles[i] + '.png'
  OUTPUT_PATH = '/content/Output/'
  STYLE = images_styles[i]

  # Use the lapstyle model from the PaddleGan library
  !python applications/tools/lapstyle.py --content_img_path {PATH_OF_CONTENT_IMG} --style_image_path {PATH_OF_STYLE_IMG} --style {STYLE} --output_path {OUTPUT_PATH}

  # Use the DeepAI API to get similarity between two images
  r = requests.post(
      "https://api.deepai.org/api/image-similarity",
      files={
          'image1': open('/content/Images/output_text2img_' + str(i) + '.png', 'rb'),
          'image2': open('/content/Output/LapStyle/stylized.png', 'rb'),
      },
      headers={'api-key': '##API-KEY##'}
  )
  # Get and store the different distances
  distance = r.json()['output']['distance']
  distanceTotal += distance
  distances_list.append(distance)



In [None]:
# transform the distance list to a DataFrame using Pandas
# to export as a CSV
import pandas as pd

df = pd.DataFrame(distances_list)
df.to_csv('distances.csv')
