In [1]:
import pandas as pd
import numpy as np
import PIL
from keras.applications import VGG16,imagenet_utils
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import load_img
from keras.models import Model
from keras.layers import Dense
import numpy as np
import glob
import os

import pickle
import time


Using TensorFlow backend.


In [2]:
df = pd.read_table('Data/DeepFashion/list_attr_img.txt',skiprows=1,sep='\s+',header=None)

In [3]:
bbox = pd.read_table('Data/DeepFashion/list_bbox.txt',sep='\s+')

In [4]:
joined = bbox.join(df,lsuffix='image_name',rsuffix='0')

In [5]:
attr = (df.iloc[:,6:].values>0)
a_counts = (np.sum(attr,axis=0))
top_a = np.flip(a_counts.argsort(),0)[:206]
top_classes = np.where(np.sum(attr,axis=0) > 1000)[0]
attr_subset = attr[:,top_a]


In [None]:
subset = pd.DataFrame([], columns=joined.columns)
for a in top_a[::-1]:
    needed = 1000 - np.sum(subset.iloc[:,6+a]>0)
    if needed > 0:
        has_attr = joined[joined.iloc[:,6+a]>0]
        sample = np.random.randint(has_attr.shape[0],size=needed)
        subset = subset.append(has_attr.iloc[sample])

In [None]:
pickle.dump(subset,open('../rebalanced_attr_subset.p','wb'))

In [6]:
subset = pickle.load(open('../rebalanced_attr_subset.p','rb'))

In [7]:
attr_subset = (subset.iloc[:,6+top_a].values>0)

In [8]:
subset.shape

(85682, 1006)

In [9]:
df = pickle.load(open('../df_subset/attr.p','rb'))

In [10]:
df.shape

(25000, 1006)

In [11]:
inputShape = (224, 224)
preprocess = imagenet_utils.preprocess_input

def preprocess_DF(bbox):
    image = crop_resize_DF(bbox)
    image = img_to_array(image)
    image = np.expand_dims(image,axis=0)
    
    return preprocess(image)

def crop_resize_DF(bbox):
    img = PIL.Image.open('Data/DeepFashion/'+bbox['image_name'])
    lx = bbox['x_1']
    ly = bbox['y_1']
    ux = bbox['x_2']
    uy = bbox['y_2']
    
    img = img.crop((lx,ly,ux,uy))
    img = img.resize(inputShape, PIL.Image.ANTIALIAS)
    return img

In [12]:
def generator(df, batch_size):
    while True:
        subset = np.random.randint(df.shape[0],size=batch_size)
        proc_imgs = []
        for i in subset:
            proc_imgs.append(np.squeeze(preprocess_DF(df.iloc[i])))
    
        attr = (df.iloc[subset,6+top_a].values>0)
        yield np.array(proc_imgs), attr

In [None]:
joined.head()

In [13]:
attr = attr_subset

In [14]:
pos_weight = np.prod(attr.shape)/np.sum(attr)

In [15]:
pos_weight

221.41995860252149

In [16]:
class_weights = 1/(np.mean(attr,axis=0)*1000+1e-8)

In [17]:
import keras.backend as K
import tensorflow as tf

def weighted_sigmoid_loss(y_true,y_pred):
    return tf.nn.weighted_cross_entropy_with_logits(y_true,y_pred,pos_weight=pos_weight*5)

def H_pred(y_true, y_pred, threshold=0.5):
    p = tf.where(y_pred>threshold,tf.ones_like(y_pred),tf.zeros_like(y_pred))
    return K.mean(K.sum(K.abs(y_true - p),axis=1))

def total_labels(y_true, y_pred, threshold=0.5):
    p = tf.where(y_pred>threshold,tf.ones_like(y_pred),tf.zeros_like(y_pred))
    return K.mean(K.sum(p,axis=1))

In [None]:
t = np.array([[1,1,0,0,1,0],[1,1,0,0,1,0]])
p = np.array([[0,1,0.6,0,1,0],[0,1,0,0,1,0]])
K.eval(H_pred(t,p))

In [None]:
t = [[0.7,0.2,0.8,0.3,0.2],[0.7,0.2,0.8,0.3,0.2]]
np.flip(np.argsort(t),axis=0)[:3]

In [18]:
def global_precision(y_true, y_pred, k=5):
    _,i = tf.nn.top_k(y_pred,k=k)
    t = tf.sparse_to_dense(i,(-1,len(top_a)),1.0,0.0)
    N_c = tf.reduce_sum(tf.cast(K.equal(y_true,t), tf.float32))
    N_p = K.sum(t)
    return N_c/N_p

In [19]:
from keras import optimizers
from keras import regularizers
from keras.layers.normalization import BatchNormalization
from keras.layers import Dropout,Flatten
l = 0.0

base_model = VGG16(weights='imagenet')
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
x = base_model.output

x = Flatten(name='flatten')(x)
x = BatchNormalization()(x)
x = Dropout(rate=0.2)(x)

x = Dense(4096, activation='relu', name='fc1', kernel_initializer='glorot_normal',
         bias_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(l))(x)
x = BatchNormalization()(x)
x = Dropout(rate=0.2)(x)

predictions = Dense(len(top_a), activation='sigmoid',name='predictions', kernel_initializer='glorot_normal',
         bias_initializer='glorot_uniform', kernel_regularizer=regularizers.l2(l))(x)

In [20]:
new_model = Model(inputs=base_model.input, outputs=predictions)




In [21]:
new_model.compile(optimizer='adam', loss=weighted_sigmoid_loss, 
                  metrics=[H_pred,total_labels])

In [22]:
from keras import callbacks
import time
tbCB = callbacks.TensorBoard(log_dir="logs/attr-retrain{}".format(time.time()), histogram_freq=0, write_graph=False, write_images=False)
checkpointCB = callbacks.ModelCheckpoint("checkpoints/retrain{}".format(time.time()),period=10)

h = new_model.fit_generator(generator(subset,64),steps_per_epoch=50,epochs=80,verbose=1,
                            callbacks=[tbCB,checkpointCB],class_weight=class_weights)

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
 7/50 [===>..........................] - ETA: 144s - loss: 2.7272 - H_pred: 70.5022 - total_labels: 71.3125

KeyboardInterrupt: 

In [None]:
h = new_model.fit_generator(generator(joined,128),steps_per_epoch=50,epochs=80,verbose=1,
                            callbacks=[tbCB,checkpointCB],class_weight=class_weights)

In [None]:
p = new_model.predict(proc_tops[:20])

In [None]:
np.where(p[0,:]>0.5)

In [None]:
np.where(p[1,:]>0.5)

In [23]:
new_model.save_weights('DF-attributes-weighted_sigmoid-7-24.h5')

In [None]:
new_model.layers[:19]

In [None]:
new_model.load_weights('Data/Trained Models/DF-Retrain 7-19.h5')

In [None]:
feat_model = Model(inputs=new_model.input,outputs=new_model.get_layer('fc1').output)

In [None]:
base_model = VGG16(weights='imagenet')

feat_model2 = Model(inputs=base_model.input,outputs=base_model.get_layer('fc1').output)

In [None]:
import pickle
tops = pickle.load(open('Data/feature_matrix/tops_10000_df.p','rb'))

In [None]:
proc_tops = []
for index, row in tops.iloc[:100].iterrows():
    proc_tops.append(np.squeeze(preprocess_DF(row)))
proc_tops = np.array(proc_tops)

In [None]:
start = time.time()
fc6_retrain_tops = feat_model.predict(proc_tops)
#pickle.dump(fc6_retrain_tops,open('Data/feature_matrix/fc6_retrain_tops.p','wb'))
print(f'{time.time()-start:.2f} s')

In [None]:
p = feat_model.predict(proc_tops[:20])

In [None]:
p

In [None]:
p2 = feat_model2.predict(proc_tops[:20])

In [None]:
p2

In [None]:
plt.hist(new_model.get_layer('predictions').get_weights()[0][:,0])

In [None]:
plt.hist(new_model.get_layer('fc1').get_weights()[0][:,0])

In [None]:
l = list(generator(joined,4))

In [None]:
for x,y in generator(joined,64):
    print(K.get_value(H_pred(new_model.predict(x),y)))
    

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.hist(np.sum(attr[:1000],axis=1))

In [None]:
229000/64

In [None]:
64*224*224