In [1]:
from PIL import Image
import glob
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input, decode_predictions
from keras.preprocessing import image
import numpy as np
import json
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # plotting
import io
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from tqdm.auto import tqdm # progress bars
tqdm.pandas()

In [2]:
%%time
df = pd.read_sql_query("SELECT * from IMAGES", sqlite3.connect('D2019.11.29_S00522_I3171_P.pdb'))
df

CPU times: user 359 ms, sys: 1.74 s, total: 2.1 s
Wall time: 2.1 s


Unnamed: 0,Well,Run,Focal,Time,Image
0,1,1,-75,43798.572520,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,1,1,-60,43798.572520,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,1,1,-45,43798.572520,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,1,1,-30,43798.572520,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,1,1,-15,43798.572520,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
...,...,...,...,...,...
49957,6,757,15,43804.348874,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
49958,6,757,30,43804.348874,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
49959,6,757,45,43804.348874,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
49960,6,757,60,43804.348874,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [3]:
def images_to_sprite(data):
    """
    Creates the sprite image along with any necessary padding
    Source : https://github.com/tensorflow/tensorflow/issues/6322
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    
    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data


def populate_img_arr(images, target_size=(100,100),should_preprocess= False):
    """
    Get an array of images for a list of image paths
    Args:
        target_size: the size of image , in pixels 
        should_preprocess: if the images should be processed (according to InceptionV3 requirements)
    Returns:
        arr: An array of the loaded images
    """
    arr = []
    for i,img_bytes in enumerate(images):
        img = Image.open(io.BytesIO(img_bytes))
        img = img.convert('RGB')
        img = img.resize(target_size, Image.NEAREST)
        x = image.img_to_array(img)
        arr.append(x)
    arr = np.array(arr)
    if should_preprocess:
        arr = preprocess_input(arr)
    return arr  

## Model Definition
### If you want to use another model, you can change it here

In [4]:
model = InceptionV3(include_top=False,pooling='avg')
model.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d (Conv2D)                (None, None, None,   864         ['input_1[0][0]']                
                                32)                                                               
                                                                                                  
 batch_normalization (BatchNorm  (None, None, None,   96         ['conv2d[0][0]']                 
 alization)                     32)                                                    

 global_average_pooling2d (Glob  (None, 2048)        0           ['mixed10[0][0]']                
 alAveragePooling2D)                                                                              
                                                                                                  
Total params: 21,802,784
Trainable params: 21,768,352
Non-trainable params: 34,432
__________________________________________________________________________________________________


In [6]:
sample = df.sample(1000)
sample

Unnamed: 0,Well,Run,Focal,Time,Image
38368,3,582,-75,43803.063510,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3740,5,57,-75,43799.005581,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
23020,5,349,45,43801.194649,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
36269,4,550,-45,43802.843957,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
13959,4,212,-75,43800.187019,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
...,...,...,...,...,...
13798,1,210,-15,43800.172122,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
8304,5,126,75,43799.551712,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
44558,1,676,45,43803.748420,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
236,4,4,0,43798.590103,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


In [8]:
%%time
img_arr = populate_img_arr(sample.Image,target_size=(100,100),should_preprocess=True)
preds = model.predict(img_arr,batch_size=64)
preds.tofile("./oss_data/tensor.bytes")
del img_arr,preds

CPU times: user 22.7 s, sys: 2.11 s, total: 24.8 s
Wall time: 4.3 s


In [9]:
%%time
raw_imgs = populate_img_arr(sample.Image, target_size=(100,100),should_preprocess=False)
sprite = Image.fromarray(images_to_sprite(raw_imgs).astype(np.uint8))
sprite.save('./oss_data/sprites.png')
del raw_imgs

CPU times: user 3.31 s, sys: 202 ms, total: 3.51 s
Wall time: 3.52 s


In [26]:
sample["label"] = "unknown"

In [27]:
sample[["Well", "Run", "Focal", "Time", "label"]].to_csv('./oss_data/metadata.tsv',sep='\t',index_label="index")