In [1]:
from PIL import Image
import glob
from keras.applications.inception_v3 import InceptionV3
from keras.applications.inception_v3 import preprocess_input, decode_predictions
from keras.preprocessing import image
import numpy as np
import json
import sqlite3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt # plotting
import io
from datetime import datetime
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
from tqdm.auto import tqdm # progress bars
tqdm.pandas()

In [2]:
%%time
con = sqlite3.connect('D2019.11.29_S00522_I3171_P.pdb')
df = pd.read_sql_query("SELECT * from IMAGES WHERE Focal=0", con)
df

CPU times: user 78.1 ms, sys: 68.6 ms, total: 147 ms
Wall time: 144 ms


Unnamed: 0,Well,Run,Focal,Time,Image
0,1,1,0,43798.572520,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
1,1,2,0,43798.575104,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
2,1,3,0,43798.582538,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
3,1,4,0,43798.589974,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4,1,5,0,43798.605711,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
...,...,...,...,...,...
4537,6,753,0,43804.315712,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4538,6,754,0,43804.323078,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4539,6,755,0,43804.330442,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...
4540,6,756,0,43804.340713,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...


Time appears to be represented as fractional days since Jan 1, 1900 (43798 days is ~119.9 years)

In [3]:
cur = con.cursor()
cur.execute("SELECT Val FROM GENERAL WHERE Par = 'Fertilization'")
fert = float(cur.fetchone()[0])
fert

43798.5520833333

In [4]:
df["datetime"] = pd.to_datetime(df.Time, origin=datetime(1900,1,1), unit="D").dt.floor("s")
df["datetime"].describe()

  df["datetime"].describe()


count                    4542
unique                   4542
top       2019-12-01 13:44:25
freq                        1
first     2019-12-01 13:44:25
last      2019-12-07 08:22:22
Name: datetime, dtype: object

In [5]:
df["Time since fertilisation (minutes)"] = (df.Time - fert) * 1440
df["Time since fertilisation (minutes)"].describe()

count    4542.000000
mean     4181.387300
std      2408.822391
min        29.428533
25%      2120.260183
50%      4139.253083
75%      6358.219979
max      8347.378417
Name: Time since fertilisation (minutes), dtype: float64

In [6]:
def images_to_sprite(data):
    """
    Creates the sprite image along with any necessary padding
    Source : https://github.com/tensorflow/tensorflow/issues/6322
    Args:
      data: NxHxW[x3] tensor containing the images.
    Returns:
      data: Properly shaped HxWx3 image with any necessary padding.
    """
    if len(data.shape) == 3:
        data = np.tile(data[...,np.newaxis], (1,1,1,3))
    data = data.astype(np.float32)
    min = np.min(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) - min).transpose(3,0,1,2)
    max = np.max(data.reshape((data.shape[0], -1)), axis=1)
    data = (data.transpose(1,2,3,0) / max).transpose(3,0,1,2)
    
    n = int(np.ceil(np.sqrt(data.shape[0])))
    padding = ((0, n ** 2 - data.shape[0]), (0, 0),
            (0, 0)) + ((0, 0),) * (data.ndim - 3)
    data = np.pad(data, padding, mode='constant',
            constant_values=0)
    # Tile the individual thumbnails into an image.
    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3)
            + tuple(range(4, data.ndim + 1)))
    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])
    data = (data * 255).astype(np.uint8)
    return data


def populate_img_arr(images, target_size=(100,100),should_preprocess= False):
    """
    Get an array of images for a list of image paths
    Args:
        target_size: the size of image , in pixels 
        should_preprocess: if the images should be processed (according to InceptionV3 requirements)
    Returns:
        arr: An array of the loaded images
    """
    arr = []
    for i,img_bytes in enumerate(images):
        img = Image.open(io.BytesIO(img_bytes))
        img = img.convert('RGB')
        img = img.resize(target_size, Image.NEAREST)
        x = image.img_to_array(img)
        arr.append(x)
    arr = np.array(arr)
    if should_preprocess:
        arr = preprocess_input(arr)
    return arr  

## Model Definition
### If you want to use another model, you can change it here

In [7]:
model = InceptionV3(include_top=False,pooling='avg')
model.summary()

Model: "inception_v3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, None,  0           []                               
                                 3)]                                                              
                                                                                                  
 conv2d (Conv2D)                (None, None, None,   864         ['input_1[0][0]']                
                                32)                                                               
                                                                                                  
 batch_normalization (BatchNorm  (None, None, None,   96         ['conv2d[0][0]']                 
 alization)                     32)                                                    

In [8]:
sample = df.sample(1000)
sample

Unnamed: 0,Well,Run,Focal,Time,Image,datetime,Time since fertilisation (minutes)
563,1,564,0,43802.939902,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-05 22:33:27,6318.458283
1866,3,353,0,43801.223537,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-04 05:21:53,3846.893717
1111,2,355,0,43801.237980,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-04 05:42:41,3867.691033
4056,6,272,0,43800.636920,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-03 15:17:09,3002.164883
1521,3,8,0,43798.628857,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-01 15:05:33,110.554433
...,...,...,...,...,...,...,...
2756,4,486,0,43802.180721,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-05 04:20:14,5225.238533
1880,3,367,0,43801.324951,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-04 07:47:55,3992.928917
2841,4,571,0,43802.988067,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-05 23:42:48,6387.816550
4151,6,367,0,43801.325080,b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00...,2019-12-04 07:48:06,3993.115917


In [9]:
%%time
img_arr = populate_img_arr(sample.Image,target_size=(100,100),should_preprocess=True)
preds = model.predict(img_arr,batch_size=64)
preds.tofile("./oss_data/tensor.bytes")
del img_arr,preds

CPU times: user 24.1 s, sys: 2.7 s, total: 26.8 s
Wall time: 4.51 s


In [10]:
%%time
raw_imgs = populate_img_arr(sample.Image, target_size=(100,100),should_preprocess=False)
sprite = Image.fromarray(images_to_sprite(raw_imgs).astype(np.uint8))
sprite.save('./oss_data/sprites.png')
del raw_imgs

CPU times: user 3.46 s, sys: 84 ms, total: 3.54 s
Wall time: 3.57 s


In [11]:
sample["label"] = "unknown"

In [12]:
sample[["Well", "Run", "datetime", "Time since fertilisation (minutes)", "label"]].to_csv('./oss_data/metadata.tsv',sep='\t',index_label="index")