In [1]:
import h5py
import os
import numpy as np
from IPython.display import clear_output
from PIL import Image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation,Flatten, Conv2D,Dropout,MaxPooling2D
from sklearn.model_selection import train_test_split

from tensorflow.keras.callbacks import History 

import matplotlib.pyplot as plt

In [None]:
from tensorflow.keras.optimizers import Adam

In [2]:
NUMBER_OF_PHOTOS = 30000

In [3]:
def size_MB(array):
    return int(array.nbytes/1000000)

In [4]:
### DONT EDIT - VALUES FROM THIS CELL ARE USED LATER IN SOME FUNCTIONS

path = "../input_data/"
photographers = [f for f in os.listdir(path) if f.endswith('.hdf5')]
photographers_part = [os.path.getsize(path+photographer) for photographer in photographers]
photographers_part = [x/sum(photographers_part) for x in photographers_part]

In [6]:
def datasets_generator(start=0.0,end = 0.7,examples_in_chunk = 100):
    
    assert start >= 0
    assert end <= 1
    assert start < end
    assert examples_in_chunk < 1500
    
    start = int(start*NUMBER_OF_PHOTOS/examples_in_chunk)
    end = int(np.ceil(end*NUMBER_OF_PHOTOS/examples_in_chunk))
    for b in range(start,end):
        X = np.ndarray(shape = (0,224,224,3))
        Y = np.ndarray(shape = (0,))
        for i in range (len(photographers)):
            if i%3==0:
                clear_output()
                print("Batch: #{}. Progress: {}%".format(b+1,round(i/len(photographers),2)))
            start = round(photographers_part[i]*examples_in_chunk*b)
            stop  = start+round(photographers_part[i]*examples_in_chunk)
            with h5py.File("{}{}".format(path, photographers[i]),"r") as f:
                X = np.concatenate([X,f["data"][start:stop]],axis=0)
                Y = np.concatenate([Y,f["labels"][start:stop]],axis=0)
        yield X/255,Y

In [7]:
def create_dataset(examples_in_chunk = 1000):
    input_path = "../input_data/"
    output_path = "../preprocessed/mydataset.hdf5"
    
    with h5py.File(output_path, 'a') as f:
        f.create_group("data")
        f.create_group("labels")
        f.create_group("scores")
        
    finish = int(np.ceil(1*NUMBER_OF_PHOTOS/examples_in_chunk))
    for b in range(finish):
        X = np.ndarray(dtype = "int8", shape = (0,224,224,3))
        Y = np.ndarray(shape = (0,))
        S = np.ndarray(shape = (0,))
        for i in range (len(photographers)):
            if i%3==0:
                clear_output()
                print("Batch: #{}. Progress: {}%".format(b+1,round(i/len(photographers),2)))
            start = round(photographers_part[i]*examples_in_chunk*b)
            stop  = start+round(photographers_part[i]*examples_in_chunk)
            with h5py.File("{}{}".format(input_path, photographers[i]),"r") as f:
                X = np.concatenate([X,f["data"][start:stop]],axis=0)
                Y = np.concatenate([Y,f["labels"][start:stop]],axis=0)
                S = np.concatenate([S,f["scores"][start:stop]],axis=0)
        with h5py.File(output_path, 'a') as f:
            f['data'].create_dataset("{}".format(b),data = X)
            f['labels'].create_dataset("{}".format(b),data = Y)
            f['scores'].create_dataset("{}".format(b),data = S)

In [8]:
#create_dataset()

In [13]:
with h5py.File("../preprocessed/mydataset.hdf5","r") as f:
    X = f["data"]["29"][:]/255
    Y = f["labels"]["29"][:]

In [9]:
# for data,labels in datasets_generator(start = 0,end=0.05):
#     X = data
#     Y = labels
#X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.2)

In [10]:
input_shape = (224,244,3)

In [11]:
model = Sequential(name= "First model")
model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
  
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1))

model.summary()
optimizer = Adam()
model.compile(optimizer=optimizer, loss='mse')

Model: "First model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 224, 244, 32)      896       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 222, 242, 32)      9248      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 111, 121, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 111, 121, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 111, 121, 64)      18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 109, 119, 64)      36928     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 54, 59, 64)        

In [18]:
batch_size = 100
train_size = 0.01
validation_size = 0.002

train_generator = datasets_generator(start = 0,end=train_size,examples_in_chunk=batch_size)
validation_generator = datasets_generator(start = train_size,end=train_size+validation_size,examples_in_chunk=batch_size)
test_generator = datasets_generator(start = train_size+validation_size,end=1,examples_in_chunk=batch_size)

lol = model.fit_generator(generator=train_generator,
                              steps_per_epoch=NUMBER_OF_PHOTOS*train_size/batch_size-1,
                             validation_data=validation_generator,
                             validation_steps=NUMBER_OF_PHOTOS*validation_size/batch_size-1)

Batch: #3. Progress: 0.98%


In [41]:
#history = model.fit(X_train,Y_train,epochs=5,batch_size=30,validation_split=0.2)

In [42]:
# # Loss Curves
# plt.figure(figsize=[8,6])
# plt.plot(history.history['loss'][1:],'r',linewidth=3.0)
# plt.plot(history.history['val_loss'][1:],'b',linewidth=3.0)
# plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
# plt.xlabel('Epochs ',fontsize=16)
# plt.ylabel('Loss',fontsize=16)
# plt.title('Loss Curves',fontsize=16)
  