# Sorting images using artificial intelligence


## Outline

1.   Install libraries and mount drive
2.   Extract image features/embeddings
3.   Reduce dimensionality using UMAP
4.   Create a chart of the sorted images
4.   Create a grid of the sorted images


#Setup


In [None]:
#@title ▶ Install the required tools

!pip -q install pointgrid iiif-downloader yale-dhlab-rasterfairy yale-dhlab-keras-preprocessing
!pip -q install umap-learn==0.5.1 hdbscan
!pip -q install lap
!pip -q install mediapy

## Google drive

Connect Google Colab to Google Drive to facilitate file management. This way your drive content will appear as a folder on the remote server and you can read and write files to it. Let's see how it works.

First we have to authorise the connection by executing the following code.

> There will be some warnings ⚠⚠⚠ , and there's a good reason: if you do this, the colab notebook code will have access to all your files there. In this case you can trust me 👼



In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Processing

In [None]:
#@title ▶ Define necessary functions

import datetime
import glob
import os
import json
import math

from urllib.parse import unquote
from tqdm import tqdm
from PIL import Image, ImageDraw
from google.colab import files
import mediapy as media
from matplotlib import pyplot as plt
%matplotlib inline

from scipy.spatial.distance import cdist
from sklearn.decomposition import PCA
import numpy as np
import lap
from umap import UMAP

from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model


def timestamp():
    return str(datetime.datetime.now()) + ':'


def clean_filename(s):
    s = unquote(os.path.basename(s))
    invalid_chars = '<>:;,"/\\|?*[]'
    for i in invalid_chars:
        s = s.replace(i, '')
    return s


class Img:
    def __init__(self, *args):
        self.path = args[0]
        self.original = load_img(self.path)


def stream_images(image_path):
    '''Read in all images from args[0], a list of image paths'''
    for idx, i in enumerate(image_path):
        try:
            yield Img(i)
        except Exception as exc:
            print(timestamp(), 'Image', i, 'could not be processed --', exc)


def get_inception_vectors(image_paths, out_dir):
    print(timestamp(), 'Creating Inception vectors for {} images'.format(len(image_paths)))
    vector_dir = os.path.join(out_dir, 'image-vectors', 'inception')
    if not os.path.exists(vector_dir):
        os.makedirs(vector_dir)
    base = InceptionV3(include_top=True, weights='imagenet', )
    model = Model(inputs=base.input, outputs=base.get_layer('avg_pool').output)
    print(timestamp(), 'Creating image array')
    vecs = []
    with tqdm(total=len(image_paths)) as progress_bar:
        for idx, i in enumerate(stream_images(image_paths)):
            vector_path = os.path.join(vector_dir, clean_filename(i.path) + '.npy')
            if os.path.exists(vector_path):
                vec = np.load(vector_path)
            else:
                im = preprocess_input(img_to_array(i.original.resize((299, 299))))
                vec = model.predict(np.expand_dims(im, 0)).squeeze()
                np.save(vector_path, vec)
            vecs.append(vec)
            progress_bar.update(1)
    return np.array(vecs)

def generate_matrix(grid_xy, image_names, width, height, image_size, proportion_w, proportion_h, margin, background_color, swap=False):
  pos_x = 0
  pos_y = 1
  if swap:
    pos_x = 1
    pos_y = 0

  tile_size_dst_w = image_size
  tile_size_dst_h = math.floor(tile_size_dst_w * proportion_h / proportion_w)

  image_width = width * (tile_size_dst_w + 2 * margin)
  image_height = height * (tile_size_dst_h + 2 * margin)

  im = Image.new("RGB", (image_width, image_height))

  draw = ImageDraw.Draw(im)
  draw.rectangle(((0, 0), (image_width, image_height)), fill=f'#{background_color}')

  for idx, filename in enumerate(image_names):
      pos = grid_xy[idx]

      corr_pos = [pos[0]*(width-1)/width, pos[1]*(height-1)/height]

      displ_x = tile_size_dst_w + 2 * margin;
      displ_y = tile_size_dst_h + 2 * margin

      left = int(margin + corr_pos[0]*image_width)
      top = int(margin + corr_pos[1]*image_height)

      tile_im = Image.open(os.path.join(input_path, filename))
      tile_im.thumbnail((tile_size_dst_w, tile_size_dst_h), Image.Resampling.LANCZOS)
      tile_w, tile_h = tile_im.size
      tile_disp_w = math.floor((tile_size_dst_w - tile_w)/2)
      tile_disp_h = math.floor((tile_size_dst_h - tile_h)/2)
      im.paste(tile_im, (left + tile_disp_w, top + tile_disp_h))

  output_image = os.path.join(output_path, f'{output_name}.jpg')
  im.save(output_image, quality=90)
  files.download(output_image)

  im.resize((min(1920, image_width), min(1920, image_height)))
  display(im)

def get_umap_layout(vectors):
  '''Get the x,y positions of images passed through a umap projection'''
  w = PCA(n_components=min(100, len(vectors))).fit_transform(vectors)

  model = UMAP(
      n_neighbors=15,
      min_dist=0.01,
      n_components=2,
      metric='correlation')
      # random_state=27,
      # transform_seed=27)
  z = model.fit(w).embedding_
  return z


def get_grid_layout(data2d):
  side = math.ceil(math.sqrt(len(imagelist)))
  width = side
  height = side if side * (side - 1) < len(imagelist) else side - 1

  xv, yv = np.meshgrid(np.linspace(0, 1, width), np.linspace(0, 1, height))
  grid = np.dstack((xv, yv)).reshape(-1, 2)

  cost = cdist(grid, data2d, 'sqeuclidean')
  cost = cost * (10000000. / cost.max())

  # # # usant cost.astype(int) en principi és més ràpid, però no ens importa
  min_cost, row_assigns, col_assigns = lap.lapjv(cost, extend_cost=True)
  return width, height, grid[col_assigns]


def generate_chart(data2d, image_names, image_size, proportion_w, proportion_h, background_color):
  data2d -= data2d.min(axis=0)
  data2d /= data2d.max(axis=0)

  tile_size_dst_w = image_size
  tile_size_dst_h = math.floor(tile_size_dst_w * proportion_h / proportion_w)

  image_width = 1920
  image_height = 1920

  im = Image.new("RGB", (image_width, image_height))

  draw = ImageDraw.Draw(im)
  draw.rectangle(((0, 0), (image_width, image_height)), fill=f'#{background_color}')

  for idx, filename in enumerate(image_names):
      pos = data2d[idx]

      left = int(pos[0]*image_width)
      top = int(pos[1]*image_height)

      tile_im = Image.open(os.path.join(input_path, filename))
      tile_im.thumbnail((tile_size_dst_w, tile_size_dst_h), Image.Resampling.LANCZOS)
      tile_w, tile_h = tile_im.size
      tile_disp_w = math.floor((tile_size_dst_w - tile_w)/2)
      tile_disp_h = math.floor((tile_size_dst_h - tile_h)/2)
      im.paste(tile_im, (left + tile_disp_w, top + tile_disp_h))

  output_image = os.path.join(output_path, f'{output_name}-chart.jpg')
  im.save(output_image, quality=90)
  files.download(output_image)

  display(im)

In [None]:
#@title ▶ Extract image features (embeddings) using an image classification neural network (InceptionV3)

#@markdown We could use any other embedding model (https://huggingface.co/models?pipeline_tag=image-feature-extraction)

#@markdown This should be the path of the folder inside Drive. The best way is to find it in the file explorer on the left, click on the 3 points, use the option "copy path" and paste the value here.

#@markdown ⚠ All the images must be in jpg format.
input_path = "/content/drive/MyDrive/BAU-DwD/UMAP-cefalopodes" #@param {type:"string"}
#@markdown The name you want to give to the UMAP (it must not contain spaces)
output_name = "BAU" #@param {type:"string"}

output_path = os.path.join("/", "content", output_name)

imagelist = sorted(glob.glob(os.path.join(input_path, "*.jpg")) + glob.glob(os.path.join(input_path, "*.png")))
vectors = get_inception_vectors(imagelist, os.path.join("/content", output_name, "data"))

In [None]:
#@title 🖼 Reduce dimensionality using UMAP

#@markdown It may take some time depending on the number of images

#@markdown It also displays the calculated positions in a 2D chart

print(f'Reducing dimensionality from {len(vectors[0])} to 2 dimensions')

data2d = get_umap_layout(vectors)
np.save(os.path.join("/content", output_name, "data", "umap.npy"), data2d)

data2d -= data2d.min(axis=0)
data2d /= data2d.max(axis=0)

plt.figure(figsize=(8, 8))
plt.scatter(data2d[:,0], data2d[:,1], s=12)
plt.show()


In [None]:
#@title (optional) Download positions in json

# create a json object with the positions and images

def save_layout(imagelist, vectors, name):
  data = []
  for image, vector in zip(imagelist, vectors):
      data.append({
          "image": image,
          "x": vector[0],
          "y": vector[1]
      })

  output_json = os.path.join("/content", output_name, "data", f"{name}.json")
  with open(output_json, 'w') as outfile:
      json.dump(data, outfile)

  from google.colab import files
  files.download(output_json)

save_layout(imagelist, vectors.astype(float), "umap")

In [None]:
#@title 🖼 Generate image chart

#@markdown Individual image size. Big values make the grid image bigger and may fail.
image_size = 128 #@param {type:"number"}

#@markdown Individual image proportion. Leave at 1/1 if images have diferent orientations or 16/9 if they are frames from a video.
proportion_w = 1 #@param {type:"number"}
proportion_h = 1 #@param {type:"number"}

#@markdown Background color in hexadecimal format (you can copy/paste from photoshop). 000000 is black, ffffff is white
background_color = "000000" #@param {type:"string"}

data2d = np.load(os.path.join("/content", output_name, "data", "umap.npy"))

generate_chart(data2d, imagelist, image_size, proportion_w, proportion_h, background_color)

In [None]:
#@title 🖼 Convert from 2D positions to 2D grid

#@markdown It also displays the calculated positions in a 2D grid

data2d = np.load(os.path.join("/content", output_name, "data", "umap.npy"))

data2d -= data2d.min(axis=0)
data2d /= data2d.max(axis=0)

width, height, grid = get_grid_layout(data2d)
np.save(os.path.join("/content", output_name, "data", "grid.npy"), grid)

plt.figure(figsize=(8,8))
plt.scatter(grid[:,0], grid[:,1], marker='o', s=12)
plt.show()


In [None]:
#@title (optional) Download grid positions in json

save_layout(imagelist, grid.astype(float), "grid")

In [None]:
#@title 🖼 Generate image matrix

#@markdown Individual image size. Big values make the grid image bigger and may fail.
image_size = 64 #@param {type:"number"}

#@markdown Individual image proportion. Leave at 1/1 if images have diferent orientations or 16/9 if they are frames from a video.
proportion_w = 1 #@param {type:"number"}
proportion_h = 1 #@param {type:"number"}

#@markdown Margin between images (in pixels)
margin = 0 #@param {type:"number"}

#@markdown Background color in hexadecimal format (you can copy/paste from photoshop). 000000 is black, ffffff is white
background_color = "000000" #@param {type:"string"}

grid = np.load(os.path.join("/content", output_name, "data", "grid.npy"))

generate_matrix(grid, imagelist, width, height, image_size, proportion_w, proportion_h, margin, background_color)

# Credits

Taller Estampa https://tallerestampa.com / https://github.com/estampa

### Based on
Kyle McDonald notebook https://github.com/kylemcdonald/CloudToGrid/blob/master/CloudToGrid.ipynb
