In [1]:
import os
import shutil
import cv2
import random
import re
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from PIL import Image

%matplotlib inline

# 数据集目录结构
```
FashionAI-Attributes
├── data/
│   ├── base/
│   ├── rank/
│   └── web/
└── solution.ipynb
```

image_demo = cv2.imread('./data/base/Images/coat_length_labels/fff3f9da02b33c0d2619a1dde0914737.jpg')
image_demo.shape

In [2]:
df_train = pd.read_csv('../data/labels_train.csv', header=None)
df_train.columns = ['id','imageId', 'url', 'labelId']
df_train.head()

Unnamed: 0,id,imageId,url,labelId
0,,imageId,url,labelId
1,0.0,1,https://contestimg.wish.com/api/webimage/570f3...,"['95', '66', '137', '70', '20']"
2,1.0,2,https://contestimg.wish.com/api/webimage/5468f...,"['36', '66', '44', '214', '105', '133']"
3,2.0,3,https://contestimg.wish.com/api/webimage/54641...,"['170', '66', '97', '153', '105', '138']"
4,3.0,4,https://contestimg.wish.com/api/webimage/550b9...,"['18', '66', '44', '153', '164']"


In [3]:
df_train.reset_index(inplace=True)
del df_train['index']
df_train.reset_index(inplace=True)
print('{0}: {1}'.format('labels_train', len(df_train)))
df_train.head()

labels_train: 8714


Unnamed: 0,index,id,imageId,url,labelId
0,0,,imageId,url,labelId
1,1,0.0,1,https://contestimg.wish.com/api/webimage/570f3...,"['95', '66', '137', '70', '20']"
2,2,1.0,2,https://contestimg.wish.com/api/webimage/5468f...,"['36', '66', '44', '214', '105', '133']"
3,3,2.0,3,https://contestimg.wish.com/api/webimage/54641...,"['170', '66', '97', '153', '105', '138']"
4,4,3.0,4,https://contestimg.wish.com/api/webimage/550b9...,"['18', '66', '44', '153', '164']"


In [4]:
label_length = 10

In [5]:
base_dir = './data/'
train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')
data_sets = ['train','valid']

for data_set in data_sets:
    set_dir = os.path.join(base_dir, data_set)
    if not os.path.exists(set_dir):
        os.makedirs(set_dir)
    for i in range(label_length):
        label_dir = os.path.join(set_dir, str(i))
        if not os.path.exists(label_dir):
            os.makedirs(label_dir)

In [110]:
data_length = len(set_dir)

for i in range(data_length):
    tmp_label = df_train['labelId'][i]
    image_id = df_train['imageId'][i]
    tmp_label = tmp_label.replace('[','')
    tmp_label = tmp_label.replace(']','')
    tmp_label = tmp_label.replace('\'','')
    tmp_label = tmp_label.split(",")
    tmp_label = map(int,tmp_label)

#     label = str(tmp_label[tmp_label.where(x == np.max(x))])
    
#     src = os.path.join(base_dir, image_id)
    
#     dst = os.path.join(base_dir, 'train',  label, image_id+'.jpg')
#     shutil.copyfile(src, dst)

In [None]:
tmp_label = list(tmp_label)
label = tmp_label.index(max(tmp_label))
label

4

In [10]:
from keras import models
from keras import layers
from keras import optimizers
from keras.applications import ResNet50
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator

In [11]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    horizontal_flip=True,
    vertical_flip=True
) 
valid__datagen = ImageDataGenerator(rescale=1./255)

In [12]:
image_width = 512
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(image_width, image_width),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)
valid_generator = valid__datagen.flow_from_directory(
    valid_dir,
    target_size=(image_width, image_width),
    batch_size=32,
    class_mode="categorical",
    shuffle=True
)

Found 9099 images belonging to 6 classes.
Found 1011 images belonging to 6 classes.


In [13]:
conv_base = ResNet50(weights='imagenet', include_top=False, input_shape=(image_width, image_width, 3))

In [None]:
conv_base.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 518, 518, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 256, 256, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 256, 256, 64) 256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [15]:
for layer in conv_base.layers:
    if re.search(r'^res5', layer.name) is not None:
        layer.trainable = True
    else:
         layer.trainable = False

In [17]:
model = models.Sequential() 
model.add(conv_base) 
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu')) 
model.add(layers.Dropout(0.5))
model.add(layers.Dense(n_class, activation='sigmoid'))

In [None]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 2, 2, 2048)        23587712  
_________________________________________________________________
flatten_2 (Flatten)          (None, 8192)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 256)               2097408   
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 6)                 1542      
Total params: 25,686,662
Trainable params: 17,052,422
Non-trainable params: 8,634,240
_________________________________________________________________


In [19]:

model.compile(optimizer=optimizers.Adam(lr=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

histroy = model.fit_generator(
    train_generator,
    steps_per_epoch=236,
    epochs=1,
    validation_data=valid_generator,
    validation_steps=50
)

Epoch 1/1

In [None]:

def cur_predict_generator(cur_generator):
    nb_samples = len(cur_generator.filenames)
    features_predict = model.predict_generator(cur_generator, nb_samples)
    return features_predict;

predict_train = cur_predict_generator(train_generator)
predict_valid = cur_predict_generator(valid_generator)

In [None]:
from keras.callbacks import CSVLogger

csv_logger = CSVLogger('log.csv', append=True, separator=';')
model.fit(X_train, Y_train, callbacks=[csv_logger])

In [None]:
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

history
