In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
n_gpus = 1
model_name = 'inceptionresnetv2_fine_tuning_8clf_2'

import keras.backend as K
import tensorflow as tf

# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.3)
# sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

Using TensorFlow backend.


# 导入必要的库

In [2]:
import numpy as np
import pandas as pd
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras.applications import *
from keras.regularizers import l2

from keras.preprocessing.image import *

import matplotlib.pyplot as plt
import random
import os
import cv2
from tqdm import tqdm
from glob import glob
import multiprocessing

from sklearn.cross_validation import train_test_split
from collections import Counter
from keras import backend as K
from keras.utils import multi_gpu_model

from IPython.display import display, Image

%matplotlib inline
%config InlineBackend.figure_format = 'retina'



# 读取数据集

In [3]:
df = pd.read_csv('/ext/fashionai/Gluon-FashionAI-Attributes-master/data/base/Annotations/label.csv', header=None)
df.columns = ['filename', 'label_name', 'label']
df = df.sample(frac=1).reset_index(drop=True) # shuffle

df.label_name = df.label_name.str.replace('_labels', '')

display(df.head())
c = Counter(df.label_name)
c

Unnamed: 0,filename,label_name,label
0,Images/neckline_design_labels/1db2ec51497b10b0...,neckline_design,nnynnnnnnn
1,Images/neckline_design_labels/b36dbed87c6b16d4...,neckline_design,nnnnnnnnyn
2,Images/coat_length_labels/5549f0164e2556ad4050...,coat_length,nnnnnynn
3,Images/sleeve_length_labels/24f543d99020376dd4...,sleeve_length,nnnnnnnyn
4,Images/lapel_design_labels/4f3dcaf18d84ddb6d0e...,lapel_design,ynnnn


Counter({'coat_length': 11320,
         'collar_design': 8393,
         'lapel_design': 7034,
         'neck_design': 5696,
         'neckline_design': 17148,
         'pant_length': 7460,
         'skirt_length': 19333,
         'sleeve_length': 13299})

In [4]:
label_count = dict([(x, len(df[df.label_name == x].label.values[0])) for x in c.keys()])
label_names = list(label_count.keys())
display(label_count)

{'coat_length': 8,
 'collar_design': 5,
 'lapel_design': 5,
 'neck_design': 5,
 'neckline_design': 10,
 'pant_length': 6,
 'skirt_length': 6,
 'sleeve_length': 9}

## 生成 y

In [5]:
fnames = df['filename'].values
width = 331
n = len(df)
y = [np.zeros((n, label_count[x])) for x in label_count.keys()]
for i in range(n):
    label_name = df.label_name[i]
    label = df.label[i]
    y[label_names.index(label_name)][i, label.find('y')] = 1

## 读取图片

In [6]:
def f(index):
    return index, cv2.resize(cv2.imread('/ext/fashionai/Gluon-FashionAI-Attributes-master/data/base/'+fnames[index]), (width, width))

X = np.zeros((n, width, width, 3), dtype=np.uint8)
with multiprocessing.Pool(32) as pool:
    with tqdm(pool.imap_unordered(f, range(n)), total=n) as pbar:
        for i, img in pbar:
            X[i] = img[:,:,::-1]

100%|██████████| 89683/89683 [01:11<00:00, 1251.18it/s]


In [7]:
n_train = int(n*0.9)
X_train = X[:n_train]
X_valid = X[n_train:]
y_train = [x[:n_train] for x in y]
y_valid = [x[n_train:] for x in y]

In [8]:
def display_images(imgs, w=8, h=4, figsize=(24, 12)):
    plt.figure(figsize=figsize)
    for i in range(w*h):
        plt.subplot(h, w, i+1)
        plt.imshow(imgs[i])

In [9]:
class Generator():
    def __init__(self, X, y, batch_size=8, aug=False):
        def generator():
            idg = ImageDataGenerator(horizontal_flip=True,
                                     rotation_range=20,
                                     zoom_range=0.2)
            while True:
                for i in range(0, len(X), batch_size):
                    X_batch = X[i:i+batch_size].copy()
                    y_barch = [x[i:i+batch_size] for x in y]
                    if aug:
                        for j in range(len(X_batch)):
                            X_batch[j] = idg.random_transform(X_batch[j])
                    yield X_batch, y_barch
        self.generator = generator()
        self.steps = len(X) // batch_size + 1

In [10]:
gen_train = Generator(X_train, y_train, batch_size=8, aug=True)

# 数据集探索

## 类别分布

In [11]:
# plt.figure(figsize=(26, 14))
# for i in range(8):
#     plt.subplot(2, 4, i+1)
#     counts = Counter(y[i].argmax(axis=-1)[np.where(y[i].any(axis=-1))])
#     pd.Series(counts).plot('bar')

### 抽样可视化

In [12]:
# plt.figure(figsize=(26, 14))
# w = 8
# h = 4
# for i in range(w*h):
#     plt.subplot(h, w, i+1)
#     index = np.random.randint(n)
#     plt.title(str([y[x][index].argmax() if y[x][index].any() else -1 for x in range(8) ]))
#     plt.imshow(X[index])
#     plt.axis('off')

# 搭建模型并训练

## 搭建模型

In [13]:
def acc(y_true, y_pred):
    index = tf.reduce_any(y_true > 0.5, axis=-1)
    res = tf.equal(tf.argmax(y_true, axis=-1), tf.argmax(y_pred, axis=-1))
    index = tf.cast(index, tf.float32)
    res = tf.cast(res, tf.float32)
    return tf.reduce_sum(res * index) / (tf.reduce_sum(index) + 1e-7)

In [14]:
base_model = InceptionResNetV2(weights='imagenet', input_shape=(width, width, 3), include_top=False, pooling='avg')
input_tensor = Input((width, width, 3))
x = input_tensor
x = Lambda(inception_resnet_v2.preprocess_input)(x)
x = base_model(x)
x = Dropout(0.5)(x)
x = [Dense(count, activation='softmax', name=name)(x) for name, count in label_count.items()]
model = Model(input_tensor, x)
model.load_weights('/ext/fashionai/Gluon-FashionAI-Attributes-master/model_nasnet_weights.h5', by_name = True)
# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])

# from IPython.display import SVG
# from keras.utils.vis_utils import model_to_dot, plot_model

# plot_model(model, show_shapes=True, to_file='model_simple.png')
# SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))

In [15]:
# model2 = multi_gpu_model(model, n_gpus)

In [16]:
# opt = SGD(1e-3, momentum=0.9, nesterov=True, decay=1e-5)

# 计算验证集准确率

In [17]:
y_pred = model.predict(X_valid, batch_size=64, verbose=1)
a = np.array([x.any(axis=-1) for x in y_valid]).T.astype('uint8')
b = [np.where((a == np.eye(8)[x]).all(axis=-1))[0] for x in range(8)]
for c in range(8):
    y_pred2 = y_pred[c][b[c]].argmax(axis=-1)
    y_true2 = y_valid[c][b[c]].argmax(axis=-1)
    print(label_names[c], (y_pred2 == y_true2).mean())

skirt_length 0.962577962578
sleeve_length 0.954209748892
pant_length 0.933609958506
collar_design 0.922363847045
lapel_design 0.95
neck_design 0.91621129326
coat_length 0.929180887372
neckline_design 0.917067307692


In [18]:
counts = Counter(df.label_name)
counts

Counter({'coat_length': 11320,
         'collar_design': 8393,
         'lapel_design': 7034,
         'neck_design': 5696,
         'neckline_design': 17148,
         'pant_length': 7460,
         'skirt_length': 19333,
         'sleeve_length': 13299})

In [19]:
s = 0
n = 0
for c in range(8):
    y_pred2 = y_pred[c][b[c]].argmax(axis=-1)
    y_true2 = y_valid[c][b[c]].argmax(axis=-1)
    s += counts[label_names[c]] * (y_pred2 == y_true2).mean()
    n += counts[label_names[c]]
print(s / n)

0.938315206043


# 在测试集上预测

In [20]:
df_test = pd.read_csv('/ext/fashionai/Gluon-FashionAI-Attributes-master/data/z_rank/Tests/question.csv', header=None)
df_test.columns = ['filename', 'label_name', 'label']

fnames_test = df_test.filename

n_test = len(df_test)
df_test.head()

Unnamed: 0,filename,label_name,label
0,Images/collar_design_labels/faad3490a16c7f3d4f...,collar_design_labels,?
1,Images/collar_design_labels/0b2b4254f35ce3a41a...,collar_design_labels,?
2,Images/collar_design_labels/7f2be608e06f804dd5...,collar_design_labels,?
3,Images/collar_design_labels/4b09d4dca80caac42e...,collar_design_labels,?
4,Images/collar_design_labels/de91f00a05e84d7239...,collar_design_labels,?


In [21]:
def f(index):
    return index, cv2.resize(cv2.imread('/ext/fashionai/Gluon-FashionAI-Attributes-master/data/z_rank/'+fnames_test[index]), (width, width))

X_test = np.zeros((n_test, width, width, 3), dtype=np.uint8)
with multiprocessing.Pool(12) as pool:
    with tqdm(pool.imap_unordered(f, range(n_test)), total=n_test) as pbar:
        for i, img in pbar:
            X_test[i] = img[:,:,::-1]

100%|██████████| 15042/15042 [00:24<00:00, 622.11it/s]


In [22]:
y_pred = model.predict(X_test, batch_size=64, verbose=1)



In [23]:
for i in range(n_test):
    problem_name = df_test.label_name[i].replace('_labels', '')
    problem_index = label_names.index(problem_name)
    probs = y_pred[problem_index][i]
    df_test.label[i] = ';'.join(np.char.mod('%.8f', probs))

In [24]:
fname_csv = 'pred_nasnet.csv'
df_test.to_csv(fname_csv, index=None, header=None)