In [1]:
import os
import shutil
import cv2
import random
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline

In [2]:
label_csv_name='labels_validation'

# 数据集目录结构
```
FashionAI-Attributes
├── data/
│   ├── base/
│   ├── rank/
│   └── web/
└── solution.ipynb
```

image_demo = cv2.imread('./data/base/Images/coat_length_labels/fff3f9da02b33c0d2619a1dde0914737.jpg')
image_demo.shape

In [3]:
df_train = pd.read_csv('./data/' + label_csv_name + '.csv', header=None,nrows =19999)
df_train.columns = ['id','imageId', 'url', 'labelId']
df_train.head()

Unnamed: 0,id,imageId,url,labelId
0,,imageId,url,labelId
1,0.0,1,https://contestimg.wish.com/api/webimage/568e1...,"['62', '17', '66', '214', '105', '137', '85']"
2,1.0,2,https://contestimg.wish.com/api/webimage/5452f...,"['95', '17', '66', '214', '164', '137', '20', ..."
3,2.0,3,https://contestimg.wish.com/api/webimage/54058...,"['122', '19', '66', '186', '180', '44', '154',..."
4,3.0,4,https://contestimg.wish.com/api/webimage/540c6...,"['190', '222', '66', '153', '164', '226', '53'..."


In [4]:
df_train.reset_index(inplace=True)
del df_train['index']
df_train.reset_index(inplace=True)
print('{0}: {1}'.format(label_csv_name, len(df_train)))
df_train.head()

labels_validation: 9898


Unnamed: 0,index,id,imageId,url,labelId
0,0,,imageId,url,labelId
1,1,0.0,1,https://contestimg.wish.com/api/webimage/568e1...,"['62', '17', '66', '214', '105', '137', '85']"
2,2,1.0,2,https://contestimg.wish.com/api/webimage/5452f...,"['95', '17', '66', '214', '164', '137', '20', ..."
3,3,2.0,3,https://contestimg.wish.com/api/webimage/54058...,"['122', '19', '66', '186', '180', '44', '154',..."
4,4,3.0,4,https://contestimg.wish.com/api/webimage/540c6...,"['190', '222', '66', '153', '164', '226', '53'..."


In [5]:
label_length = 16

In [6]:
base_dir = './data/'
train_dir = os.path.join(base_dir, 'train1')
valid_dir = os.path.join(base_dir, 'valid1')
data_sets = ['train1','valid1']

for data_set in data_sets:
    set_dir = os.path.join(base_dir, data_set)
    if not os.path.exists(set_dir):
        os.makedirs(set_dir)
    for i in range(label_length):
        label_dir = os.path.join(set_dir, str(i))
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

In [7]:
data_length = len(df_train)

for i in range(data_length):
    if i != 0 :
        tmp_label = df_train['labelId'][i]
        image_id = df_train['imageId'][i]
        tmp_label = tmp_label.replace('[','')
        tmp_label = tmp_label.replace(']','')
        tmp_label = tmp_label.replace('\'','')
        tmp_label = tmp_label.split(",")
        tmp_label = map(int,tmp_label)
        tmp_label = list(tmp_label)
        label = tmp_label.index(max(tmp_label))
        src = os.path.join(base_dir + 'train',image_id + '.jpg')
        dst = os.path.join(base_dir + 'train1',  str(label), image_id + '.jpg')
        shutil.copyfile(src, dst)

In [8]:
data_length

9898

In [9]:
from keras import models
from keras import layers
from keras import optimizers
from keras.applications import ResNet50
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [10]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    horizontal_flip=True,
    vertical_flip=True
) 
valid__datagen = ImageDataGenerator(rescale=1./255)

In [11]:
image_width = 512
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(image_width, image_width),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)
valid_generator = valid__datagen.flow_from_directory(
    valid_dir,
    target_size=(image_width, image_width),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)

Found 28545 images belonging to 16 classes.
Found 12 images belonging to 16 classes.


In [12]:
conv_base = ResNet50(weights='imagenet', include_top=False, input_shape=(image_width, image_width, 3))

In [13]:
conv_base.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 518, 518, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 256, 256, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 256, 256, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [14]:
for layer in conv_base.layers:
    if re.search(r'^res5', layer.name) is not None:
        layer.trainable = True
    else:
         layer.trainable = False

In [15]:
model = models.Sequential() 
model.add(conv_base) 
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu')) 
model.add(layers.Dropout(0.5))
model.add(layers.Dense(label_length, activation='sigmoid'))

In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2, 2, 2048)        23587712  
_________________________________________________________________
flatten_1 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_1 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                4112      
Total params: 25,689,232
Trainable params: 17,054,992
Non-trainable params: 8,634,240
_________________________________________________________________


In [None]:

model.compile(optimizer=optimizers.Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

histroy = model.fit_generator(
    train_generator,
    steps_per_epoch=625,
    epochs=10,
    validation_data=valid_generator,
    validation_steps=50
)

Epoch 1/10


In [None]:
model.save("model.h5")

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()