In [31]:
from __future__ import print_function
from keras.layers import Dense, Conv2D, BatchNormalization, Activation, MaxPooling2D
from keras.layers import AveragePooling2D, Input, Flatten, Activation, Dropout, Dense
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from keras.callbacks import ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras.regularizers import l2
from keras import backend as K
from keras.models import Sequential
from painting_loader import PaintingFolder
import pandas as pd
import random
import numpy as np

In [3]:
# pogledamo podatke o SVIM slikama, one su spremljene u datoteci all_data_info.csv

df = pd.read_csv("/home/ivana/data/all_data_info.csv")
# inicijalizirmo seed za random pozive
seed = 123
df.head()

Unnamed: 0,artist,date,genre,pixelsx,pixelsy,size_bytes,source,style,title,artist_group,in_train,new_filename
0,Barnett Newman,1955.0,abstract,15530.0,6911.0,9201912.0,wikiart,Color Field Painting,Uriel,train_only,True,102257.jpg
1,Barnett Newman,1950.0,abstract,14559.0,6866.0,8867532.0,wikiart,Color Field Painting,Vir Heroicus Sublimis,train_only,True,75232.jpg
2,kiri nichol,2013.0,,9003.0,9004.0,1756681.0,,Neoplasticism,,test_only,False,32145.jpg
3,kiri nichol,2013.0,,9003.0,9004.0,1942046.0,,Neoplasticism,,test_only,False,20304.jpg
4,kiri nichol,2013.0,,9003.0,9004.0,1526212.0,,Neoplasticism,,test_only,False,836.jpg


In [17]:
############# TESTIRANJE RADI LI OVO: MAPA TRAIN_2 ###########################
# threshold je granica koja određujemo kojeg ćemo autora uzeti,
# odnosno koliko autor najmanje treba imati slika da ga uzmemo u obzir

threshold = 20

# s ovom linijom dohvaćamo podtablicu koja se odnosi 
# na one slike koje pripadaju datoteci train_2
df2 = df.loc[(df['in_train'] == True) & df['new_filename'].str.startswith('2')]
# x2.head()

# koliko uopće imamo autora u ovoj podtablici
x2 = list(df2['artist'].value_counts())

# koliko imamo autora s bar threshold slika
print(len([a for a in x2 if a >= threshold]))

# oblik tablice
print(df2.shape)
# df2.head()

# nadalje, radimo sa df2 tablicom koju spremamo u df
df=df2

87
(8476, 12)


In [18]:
print(len([a for a in x2 if a <= 10]))

1229


In [19]:
# mali problem su slikari s jednom slikom, odnosno oni koji imaju manje od 10 slika
small_artists = [a for a in x2 if a <= 10]
print('Autora s <= 10 slika ima ' + str(len(small_artists)))

# sljedeće linije su ostavljene zbog debuggiranja
# tmp2 = df['artist'].value_counts()
# small_artist_df = tmp2[tmp2 <= 10].index.tolist()
# print(len(small_artist_df))

Autora s <= 10 slika ima 1229


In [28]:
# train, validation, test --- 80, 10, 10
# num_train = threshold * 0.8
num_train = 16       # num_val = threshold * 0.1
num_val = 2          # num_test = num_val
num_test = num_val
num_samples = num_train + num_val + num_test   # num_samples = threshold

# podtablica umjetnika koje ćemo promatrati
temp = df['artist'].value_counts()
# lista umjetnika koje ćemo promatrati
artists = temp[temp >= threshold].index.tolist()
# print(artists)
num_artists = len(artists)
print('Prepoznajemo ' + str(num_artists) + ' slikara')

train_dfs = []
val_dfs = []
test_dfs = []

# u petlji uzimamo slike pojedinih autora, preraspodijeljujemo ih na train, val i test
for a in artists:
    # PROVJERI KASNIJE ŠTA JE S NA=TRUE
    tmp = df[df['artist'].str.startswith(a)].sample(n=num_samples, random_state=seed)
    t_df = tmp.sample(n = num_train, random_state=seed)
    rest_df = tmp.loc[~tmp.index.isin(t_df.index)] # uzmi komplement od t_df
    v_df = rest_df.sample(n = num_val, random_state=seed)
    te_df = rest_df.loc[~rest_df.index.isin(v_df.index)]
    
    train_dfs.append(t_df)
    val_dfs.append(v_df)
    test_dfs.append(te_df)

train_df = pd.concat(train_dfs)
val_df = pd.concat(val_dfs)
test_df = pd.concat(test_dfs)

print('train tablica\t\t', train_df.shape)
print('validation tablica\t', val_df.shape)
print('test tablica\t\t', test_df.shape)

Prepoznajemo 87 slikara
train tablica		 (1392, 12)
validation tablica	 (174, 12)
test tablica		 (174, 12)


In [32]:
mean_resnet = np.array([0.485, 0.456, 0.406])
std_resnet = np.array([0.229, 0.224, 0.225])

def center_crop(img, center_crop_size):
    assert img.shape[2] == 3
    centerw, centerh = img.shape[0] // 2, img.shape[1] // 2
    halfw, halfh = center_crop_size[0] // 2, center_crop_size[1] // 2
    return img[centerw-halfw:centerw+halfw, centerh-halfh:centerh+halfh, :]

# https://jkjung-avt.github.io/keras-image-cropping/
def random_crop(img, random_crop_size):
    # Note: image_data_format is 'channel_last'
    assert img.shape[2] == 3
    height, width = img.shape[0], img.shape[1]
    dy, dx = random_crop_size
    x = np.random.randint(0, width - dx + 1)
    y = np.random.randint(0, height - dy + 1)
    return img[y:(y+dy), x:(x+dx), :]


def crop_generator(batches, crop_length, random_crop_bool=True):
    '''
    Take as input a Keras ImageGen (Iterator) and generate random
    crops from the image batches generated by the original iterator
    '''
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
        for i in range(batch_x.shape[0]):
            if random_crop_bool == True:
                batch_crops[i] = random_crop(batch_x[i], (crop_length, crop_length))
            else:
                batch_crops[i] = center_crop(batch_x[i], (crop_length, crop_length))
        yield (batch_crops, batch_y)


# https://tutorials.technology/blog/02-Selecting-random-elements-from-list-using-python-Reservoir-Sampling-algorithm.html
def select_k_random_elements_from(collection_iterator, k):
    result = {}
    n = 0

    for item in collection_iterator:
        n += 1
        if len(result) < k:
            result[n - 1] = item
        else:
            selected_index = int(random.random() * n)
            if selected_index < k:
                result[selected_index] = item
    return result.values()

def select_k_elements_from(collection_iterator, k, artist_index):
    result = {}
    n = 0

    for index, item in collection_iterator:
        n += 1
        if len(result) < k:
            result[n - 1] = item
        else:
            if item[0] == artist_index:
                selected_index = index
            if selected_index < k:
                result[selected_index] = item
    return result.values()

In [33]:
# velicina slika koje dajemo ulaznom sloju mreze
input_shape = (224, 224, 3)
# velicina batch-a
b_size = 60

train_datagen = ImageDataGenerator(
                horizontal_flip=True)

val_datagen = ImageDataGenerator(
                horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
                    'data/images/train',
                    batch_size=b_size,
                    class_mode='sparse')
# na slikama iz train skupa radimo crop na slučajnom mjestu
train_crops = crop_generator(train_generator, 224)

validation_generator = val_datagen.flow_from_directory(
                    'data/images/validation',
                    batch_size=b_size,
                    class_mode='sparse')
# na slikama iz validation skupa radimo centralni crop
val_crops = crop_generator(validation_generator, 224, False)

Found 6780 images belonging to 1180 classes.
Found 848 images belonging to 506 classes.


# početak ćelija za debuggiranje

In [34]:
# ćelija za debuggarianje i provjeru, ne treba pokretati
i = 0
for item in enumerate(validation_generator):
    print(type(item[1]))
    print(item[1][0].shape)
    print(item[1][1])
    print(item[1][1].shape)
    i += 1
    if i >= 3:
        break

<class 'tuple'>
(60, 256, 256, 3)
[123 328 315 338 286 254 417 230 424  53 485 431 312 486 308 238 197 214
 269 350 326 117  71 144 155 359 336  74 256 417 323 496  44 198 390  80
 199  43 412 419 304 317 141 131  97 222 456 488 244 251 154  81 235 135
 240 303 312  19 296  51]
(60,)
<class 'tuple'>
(60, 256, 256, 3)
[489 375 482 405  31  91  41 140 267  35 178 356  59 150 464  34 211  89
 493 112 141 206  95 288 343 501 451 267 503 453 245 281  49 489  50 246
  94  67 466  97 472 220 279  12 342 256  96 216   0 117 368 467 366 102
  60 154 474 228 375 137]
(60,)
<class 'tuple'>
(60, 256, 256, 3)
[186 439 248 419 443 478 504 468 147  16 159 384 131 486 466 232 394 504
  90 485 425 476 335 247  39 171 207  28   2 452 350 488 289 332 166 439
 383 334 156 446 375 137 374 474 309 393 272  60 197 341  47 332 252 427
 167  17 329 410  30 485]
(60,)


In [35]:
# također još jedna ćelija za debuggiranje i provjeru, ne treba pokretati
label_map_train = (train_generator.class_indices)
label_map_val = (validation_generator.class_indices)

labels_train = set(label_map_train.keys())
labels_val = set(label_map_val.keys())
intersection = labels_train & labels_val & set(artists)

print(intersection)

train_dfs = []
val_dfs = []
test_dfs = []

print('Prepoznajemo ' + str(len(intersection)) + ' slikara')

{'Edouard Manet', 'Jean Auguste Dominique Ingres', 'Odilon Redon', 'Charles M. Russell', 'David Burliuk', 'Gene Davis', 'Francisco Goya', 'Giovanni Boldini', 'Henri Matisse', 'Amedeo Modigliani', 'Dante Gabriel Rossetti', 'Pyotr Konchalovsky', 'Raphael Kirchner', 'Louis Comfort Tiffany', 'Alfred Sisley', 'William-Adolphe Bouguereau', 'Ilya Mashkov', 'Lucian Freud', 'Karl Bryullov', 'Zinaida Serebriakova', 'James Tissot', 'Ernst Ludwig Kirchner', 'Joan Miro', "Georgia O'Keeffe", 'Fernand Leger', 'Felix Vallotton ', 'Konstantin Yuon', 'M.C. Escher', 'Jan Matejko', 'Henri de Toulouse-Lautrec', 'Zdislav Beksinski', 'Sam Francis', 'Paolo Veronese', 'Theodor Severin Kittelsen'}
Prepoznajemo 34 slikara


In [None]:
# ćelija za debuggiranje i provjeru
batch_index = 0
artists_and_indices = {}

while batch_index < train_generator.batch_index:
    data = train_generator.next()
    key = data[1][batch_index]
    value = data[0]
    artists_and_indices.setdefault(key, [])
    artists_and_indices[key].append(value)
    batch_index += 1

print(artists_and_indices)

In [36]:
# ćelija za debuggiranje i provjeru

batch_x, batch_y = next(train_crops)
print(batch_x.shape)
print(batch_y.shape)
batch_xx = batch_x[0].reshape(224, 224, 3)
print(batch_xx.shape)
cropped_image = array_to_img(batch_xx)
cropped_image.show()

(60, 224, 224, 3)
(60,)
(224, 224, 3)


# kraj ćelija za debuggiranje

# Model mreže

In [None]:
# model mreže inspiriran glavnim člankom

model = Sequential()
#nn.Conv2d(3, 32, kernel_size=3 +, stride=2 +, padding=1), # -> 112
model.add(Conv2D(32, kernel_size=3, strides=2, padding="same",input_shape=input_shape))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=2))
model.add(Conv2D(32, kernel_size=3, strides=2, padding="same", input_shape=input_shape))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(MaxPooling2D(pool_size=2))
model.add(Flatten())
model.add(Dense(4*num_artists, input_shape=(6272,))) #nn.Linear(6272, 4*num_artists),
model.add(Activation('relu'))
model.add(Dense(num_artists, input_shape=(4*num_artists,))) #nn.Linear(4*num_artists, num_artists)

# koristimo adamov optimizator i metrika je točnost
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=1e-4),
              metrics=['accuracy'])

# crta tablicu slojeva mreže
model.summary()

In [None]:
# treniramo mrežu....

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

model.fit_generator(
    train_crops,
    steps_per_epoch=STEP_SIZE_TRAIN,
    epochs=3,
    validation_data=STEP_SIZE_VALID,
    validation_steps=b_size)

model.save_weights('prvi_pokusaj.h5')

# iduće ćelije služe/služile su za debuggiranje

In [None]:
train_dset = PaintingFolder('data/images/train/', transform=None, df=train_df)
val_dset = PaintingFolder('data/images/val/', transform=None, df=val_df)
test_dset = PaintingFolder('data/images/test/', transform=None, df=test_df)

In [None]:
print(train_dset[0])
print(len(val_dset))

In [None]:
datagen = ImageDataGenerator(
            horizontal_flip=True)

train_x = []
for img in val_dset:
    temp_x = img_to_array(img[0])
    temp_x = temp_x.reshape((1,)+temp_x.shape)
    train_x.append(temp_x)

In [None]:
thefile = open('train2-val.txt', 'w')
for item in train_x:
    thefile.write('%s\n' % item)

In [None]:
img = load_img('data/train_2/2.jpg')
img2 = load_img('data/train_2/20.jpg')
# img.show()
x = img_to_array(img)
# print(x.shape)
x = x.reshape((1,)+x.shape)
# print(x.shape)
x2 = img_to_array(img2)
x2 = x2.reshape((1,)+x2.shape)
print('x2', x2.shape)
X = [x, x2]


train_batch = datagen.flow(X, batch_size=1)
train_crops = crop_generator(train_batch, 224, False)

batch_x, batch_y = next(train_crops)
print(batch_x.shape)
print(batch_y.shape)
batch_x = batch_x.reshape(224, 224, 3)
print(batch_x.shape)
cropped_image = array_to_img(batch_x)
cropped_image.show()