In [2]:
import h5py
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Flatten, Conv2D,Dropout,MaxPooling2D
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import Callback

In [13]:
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []

    def on_batch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))

In [3]:
INPUT_PATH = "../preprocessed/mydataset.hdf5"
NUMBER_OF_CHUNKS = 30
PHOTOS_IN_CHUNK = 1000
NUMBER_OF_PHOTOS = NUMBER_OF_CHUNKS*PHOTOS_IN_CHUNK

In [54]:
def create_generator(start=0,stop=0.7,batch_size=300,verbose = False):
    
    assert start<=stop and start >= 0 and stop <= 1 and batch_size<PHOTOS_IN_CHUNK
    
    batches_in_chunk = int(PHOTOS_IN_CHUNK/batch_size)
    start = int(start*(NUMBER_OF_PHOTOS))
    end = int(stop*(NUMBER_OF_PHOTOS))
    counter = start
    while True:
        container = int(counter/PHOTOS_IN_CHUNK)
        _from = counter%PHOTOS_IN_CHUNK
        _to = _from + batch_size
        excess = max(0,_from+batch_size-PHOTOS_IN_CHUNK)
        with h5py.File(INPUT_PATH,"r") as f:
            X = f["data"][str(container)][_from:_to]/255
            Y = f["labels"][str(container)][_from:_to]
        if _to < end:
            with h5py.File(INPUT_PATH,"r") as f:
                X = np.concatenate([X,f["data"][str((container+1)%NUMBER_OF_CHUNKS)][0:excess]/255],axis=0)
                Y = np.concatenate([Y,f["labels"][str((container+1)%NUMBER_OF_CHUNKS)][0:excess]],axis=0)
        counter += batch_size
        if counter >= end:
            counter = start
        yield X,Y
        if verbose == True:
            print("Generated X of shape {} and Y of shape: {} from container [\"{}\"][{}:{}] and container [\"{}\"][0:{}]."
                  .format(X.shape,Y.shape,container,_from,min(PHOTOS_IN_CHUNK,_to),container+1,excess))
        del X,Y

In [5]:
input_shape = (224,224,3)
batch_size = 32
train_size = 0.2
validation_size = 0.05
test_size = 0.05

In [6]:
train_generator = create_generator(start = 0,stop=train_size,batch_size=batch_size)
validation_generator = create_generator(start = train_size,stop=train_size+validation_size,batch_size=batch_size)

In [44]:
test_generator = create_generator(start = train_size+validation_size,stop=min(1,train_size+validation_size+test_size),batch_size=batch_size)

In [7]:
model = Sequential(name= "First model")
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1))

model.summary()
optimizer = Adam()
model.compile(optimizer=optimizer, loss='mse')

Model: "First model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 224, 32)      896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 222, 222, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 111, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 111, 111, 64)      18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 109, 109, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 54, 64)        

In [14]:
history = LossHistory()

In [15]:
model.fit_generator(generator=train_generator,
                              steps_per_epoch=int(NUMBER_OF_PHOTOS*train_size/batch_size),
                             validation_data=validation_generator,
                             validation_steps=int(NUMBER_OF_PHOTOS*validation_size/batch_size), epochs = 1,callbacks=[history])



<tensorflow.python.keras.callbacks.History at 0x7f2df59b87f0>

In [21]:
test_generator = create_generator(start = train_size+validation_size,stop=min(1,train_size+validation_size+test_size),batch_size=batch_size)
model.evaluate(test_generator,steps = int(NUMBER_OF_PHOTOS*test_size/batch_size))



0.12438814609271029

In [23]:
test_generator = create_generator(start = train_size+validation_size,stop=min(1,train_size+validation_size+test_size),batch_size=batch_size)
Y_pred = model.predict(test_generator,steps = int(NUMBER_OF_PHOTOS*test_size/batch_size))

array([[0.9349745 ],
       [0.9381377 ],
       [0.9360941 ],
       ...,
       [0.93140465],
       [0.9275928 ],
       [0.9367203 ]], dtype=float32)

In [27]:
Y_pred.shape

(1472, 1)

In [45]:
Y = np.ndarray(shape=(0,))
steps = int(NUMBER_OF_PHOTOS*test_size/batch_size)
for x,y in test_generator:
    Y = np.concatenate([Y,y],axis=0)
    steps -= 1
    if steps == 0:
        break

Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][500:532] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][532:564] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][564:596] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][596:628] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][628:660] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][660:692] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][692:724] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) from container ["7"][724:756] and container ["8"][0:0].
Generated X of shape (32, 224, 224, 3) and Y of shape: (32,) fro

In [60]:
Y_pred.mean()

0.92975616

In [62]:
Y_pred.std()

0.006988769

In [63]:
Y.mean()

1.0277006567896756

In [65]:
Y.std()

0.3387483870983686

<h1>Conclussion</h1>
<p> <h4> Because ratios are distributed with very low standard deviation (30% of mean) our model misrepresent reality and finds one value that gives him the smallest error over all examples and sticks to it. Therefore, prediction has microscopic standard deviation (0.75 % of mean). Perhaps the solution is to use some techniques to discretize our dataset into equally distributed set of classes.</h4> </p> 