 # Mask RCNN FashionAI

In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import matplotlib

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import pandas as pd
import tensorflow as tf
from config import Config
import utils
import model as modellib
import visualize
from model import log

%matplotlib inline 

# Root directory of the project
ROOT_DIR = os.getcwd()

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "logs/mask_rcnn_coco.h5")
print(COCO_MODEL_PATH)
    
'''添加fashion ai'''
fi_class_names_=['neckline_left', 'neckline_right', 'center_front', 'shoulder_left', 
                'shoulder_right', 'armpit_left','armpit_right', 'waistline_left', 
                'waistline_right', 'cuff_left_in','cuff_left_out', 'cuff_right_in', 
                'cuff_right_out', 'top_hem_left','top_hem_right', 'waistband_left', 
                'waistband_right', 'hemline_left','hemline_right', 'crotch',
                'bottom_left_in', 'bottom_left_out','bottom_right_in', 'bottom_right_out']
fi_class_names=['clothing']

  from ._conv import register_converters as _register_converters


Using TensorFlow backend.


/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/logs/mask_rcnn_coco.h5


# Configurations

In [2]:
class FIConfig(Config):
    """Configuration for training on the toy shapes dataset.
    Derives from the base Config class and overrides values specific
    to the toy shapes dataset.
    """
    # Give the configuration a recognizable name
    NAME = "FI"

    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 2

    # Uncomment to train on 8 GPUs (default is 1)
    # GPU_COUNT = 8

    # Number of classes (including background)
    # NUM_CLASSES = 1 + 80  # COCO has 80 classes
    NUM_CLASSES = 1 + 1  # Person and background

    NUM_KEYPOINTS = 24
    MASK_SHAPE = [28, 28]
    KEYPOINT_MASK_SHAPE = [56,56]
    # DETECTION_MAX_INSTANCES = 50
    TRAIN_ROIS_PER_IMAGE = 100
    MAX_GT_INSTANCES = 128
    RPN_TRAIN_ANCHORS_PER_IMAGE = 150
    USE_MINI_MASK = True
    MASK_POOL_SIZE = 14
    KEYPOINT_MASK_POOL_SIZE = 7
    LEARNING_RATE = 0.002
    STEPS_PER_EPOCH = 1000
    WEIGHT_LOSS = True
    KEYPOINT_THRESHOLD = 0.005
config = FIConfig()

# DataSet

In [3]:
class FIDataset(utils.Dataset):
    """Generates the shapes synthetic dataset. The dataset consists of simple
    shapes (triangles, squares, circles) placed randomly on a blank surface.
    The images are generated on the fly. No file access required.
    """
    """参数:category决定数据类别为train validation test"""
        
    def load_FI(self,category):
        """Generate the requested number of synthetic images.
        count: number of images to generate.
        height, width: the size of the generated images.
        """
        train_data_path='./data/train/'
        # Add classes
        for i,class_name in enumerate(fi_class_names):
            self.add_class("FI", i+1,class_name)
            
        annotations=pd.read_csv('./data/train/Annotations/annotations.csv')
        annotations=annotations.append(pd.read_csv('./data/train/Annotations/train.csv'),ignore_index=True)
        
        #切分test数据集和train数据集
        np.random.seed(42)
        shuffled_indces=np.random.permutation(annotations.shape[0])
        
        val_set_size=int(annotations.shape[0]*0.01)
        val_indices=shuffled_indces[:val_set_size]
        train_indices=shuffled_indces[val_set_size:]
        
        
        if category =='train':
            annotations=annotations.iloc[train_indices]
        elif category=='val':
            annotations=annotations.iloc[val_indices]
        else:
            #test 数据集
            pass
        # Add images
        # Generate random specifications of images (i.e. color and
        # list of shapes sizes and locations). This is more compact than
        # actual images. Images are generated on the fly in load_image().
        
        annotations=annotations.reset_index(drop=True)#更新索引
        
        for x in range(annotations.shape[0]):
            #bg_color, shapes = self.random_image(height, width)
            id=annotations.loc[x,'image_id']
            category=annotations.loc[x,'image_category']
            
            im_path=os.path.join(train_data_path,id)
    
            height,width=mpimg.imread(im_path).shape[0:2]
        
            key_points=[]
            for key_point in annotations.loc[x,fi_class_names_].values:
                loc_cat=[int(j) for j in key_point.split('_')]
                key_points.append(loc_cat)
            
            self.add_image("FI", image_id=id, path=im_path,
                           width=width-1, height=height-1,
                           key_points=key_points,image_category=category)#添加我的数据

    def load_image(self, image_id):
        """Generate an image from the specs of the given image ID.
        Typically this function loads the image from a file, but
        in this case it generates the image on the fly from the
        specs in image_info.
        根据image_id读取图片
        """
        info = self.image_info[image_id]
        image=mpimg.imread(info['path'])
        return image

    def image_reference(self, image_id):
        """Return the key_points data of the image."""
        info = self.image_info[image_id]
        if info["source"] == "FI":
            return info["key_points"],info["image_category"]
        else:
            super(self.__class__).image_reference(self, image_id)
    def load_keypoints(self, image_id):
        """Load clothing keypoints for the given image.

        Returns:
        key_points: num_keypoints coordinates and visibility (x,y,v)  [num_person,num_keypoints,3] of num_person
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks, here is always equal to [num_person, 1]
        """
        # If not a COCO image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "FI":
            return super(CocoDataset, self).load_mask(image_id)

        keypoints = []
        class_ids = []
        instance_masks = []
        annotations = self.image_info[image_id]["key_points"]
        # Build mask of shape [height, width, instance_count] and list
        # of class IDs that correspond to each channel of the mask.
        for annotation in annotations:
            class_id = self.map_source_class_id(
                "coco.{}".format(annotation['key_points']))
            assert class_id == 1
            if class_id:

                #load masks
                m = self.annToMask(annotation, image_info["height"],
                                   image_info["width"])
                # Some objects are so small that they're less than 1 pixel area
                # and end up rounded out. Skip those objects.
                if m.max() < 1:
                    continue
                # Is it a crowd? If so, use a negative class ID.
                if annotation['iscrowd']:
                    # Use negative class ID for crowds
                    class_id *= -1
                    # For crowd masks, annToMask() sometimes returns a mask
                    # smaller than the given dimensions. If so, resize it.
                    if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
                        m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
                instance_masks.append(m)
                #load keypoints
                keypoint = annotation["keypoints"]
                keypoint = np.reshape(keypoint,(-1,3))

                keypoints.append(keypoint)
                class_ids.append(class_id)

        # Pack instance masks into an array
        if class_ids:
            keypoints = np.array(keypoints,dtype=np.int32)
            class_ids = np.array(class_ids, dtype=np.int32)
            masks = np.stack(instance_masks, axis=2)
            return keypoints, masks, class_ids
        else:
            # Call super class to return an empty mask
            return super(CocoDataset, self).load_keypoints(image_id)
    def load_mask(self, image_id):
        """Generate instance masks for shapes of the given image ID.
        """
        info = self.image_info[image_id]
        
        key_points=np.array(info['key_points'])
        clothing_nums=int(len(key_points)/24)
        
        m = np.zeros([clothing_nums,info['height'], info['width'], 24])#生成24个mask,因为有24个关键点。
        
        class_mask = np.zeros([clothing_nums,24])  #点存在的状态经过处理有三种状态 不存在为0  1为不可见.2 为可见 三分类
        class_ids=[]
       
        for clothing_num in range(clothing_nums):
            
            for part_num, bp in enumerate(key_points):
                if bp[2] > -1: #AI数据编码为bp[2]=1为可见，=2为不可见，=3为不在图内或者不可推测，FI编码为=-1为不存在，0为不可见，1为可   
                    m[clothing_num,bp[1],bp[0],part_num] = 1
                    class_mask[clothing_num,part_num] = bp[2]+1
            class_ids.append(1)
            
        #Pach instance masks into an array
        if class_ids:
            mask = m
            class_ids = np.array(class_ids, dtype=np.int32)
            return mask, class_ids, class_mask   
        else:
            return super(self.__class__).load_mask(image_id)

In [7]:
# Training dataset
dataset_train = FIDataset()
dataset_train.load_FI(category='train')
dataset_train.prepare()

# Validation dataset
dataset_val = FIDataset()
dataset_val.load_FI(category='val')
dataset_val.prepare()

print("Classes: {}.\n".format(dataset_train.class_names))
print("Train Images: {}.\n".format(len(dataset_train.image_ids)))
print("Valid Images: {}".format(len(dataset_val.image_ids)))

Classes: ['BG', 'clothing'].

Train Images: 43724.

Valid Images: 441


# Create model

In [7]:
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="training", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True,exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
print("Loading weights from ", COCO_MODEL_PATH)
# model.keras_model.summary()

Loading weights from  /home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/logs/mask_rcnn_coco.h5


In [12]:
# Training - Stage 1
print("Train heads")
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=15,
            layers='heads')
# Training - Stage 2
# Finetune layers from ResNet stage 4 and up
print("Training Resnet layer 4+")
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE / 10,
            epochs=20,
            layers='4+')
# Training - Stage 3
# Finetune layers from ResNet stage 3 and up
print("Training Resnet layer 3+")
model.train(dataset_train, dataset_val,
            learning_rate=config.LEARNING_RATE / 100,
            epochs=100,
            layers='all')

Train heads



Starting at epoch 0. LR=0.002

Checkpoint Path: /home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/logs/fi20180330T1331/mask_rcnn_fi_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_shared        (Conv2D)
    rpn_class_raw          (Conv2D)
    rpn_bbox_pred          (Conv2D)
mrcnn_keypoint_mask_conv1   (TimeDistributed)
mrcnn_keypoint_mask_bn1   (TimeDistributed)
mrcnn_keypoint_mask_conv2   (TimeDistributed)
mrcnn_keypoint_mask_bn2   (TimeDistributed)
mrcnn_keypoint_mask_conv3   (TimeDistributed)
mrcnn_keypoint_mask_bn3   (TimeDistributed)
mrcnn_keypoint_mask_conv4   (TimeDistributed)
mrcnn_keypoint_mask_bn4   (TimeDistributed)
mrcnn_keypoint_mask_conv5   (TimeDistributed)
mrcnn_keypoint_mask_bn5   (Ti

ERROR:root:Error processing image {'height': 511, 'path': './data/train/Images/skirt/034d23b57ff1f8020b186854d627d597.jpg', 'image_category': 'skirt', 'key_points': [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [199, 218, 1], [279, 234, 1], [155, 417, 1], [292, 446, 1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1]], 'source': 'FI', 'id': 'Images/skirt/034d23b57ff1f8020b186854d627d597.jpg', 'width': 479}
Traceback (most recent call last):
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 2195, in data_generator_keypoint
    #image_meta:image_id,image_shape,windows.active_class_ids
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 1735, in load_image_gt_keypoints
    # mask, class_ids = dataset.load_mask(image_id)
  File "<ipython-input-10-d0f

ERROR:root:Error processing image {'height': 511, 'path': './data/train/Images/blouse/e9231dc8192c8bb73d21f17f0aa7002b.jpg', 'image_category': 'blouse', 'key_points': [[241, 71, 1], [294, 67, 1], [266, 80, 1], [163, 117, 1], [378, 122, 1], [164, 224, 1], [362, 232, 1], [-1, -1, -1], [-1, -1, -1], [150, 433, 1], [126, 433, 1], [380, 440, 1], [405, 441, 1], [187, 446, 1], [340, 451, 1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1]], 'source': 'FI', 'id': 'Images/blouse/e9231dc8192c8bb73d21f17f0aa7002b.jpg', 'width': 511}
Traceback (most recent call last):
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 2195, in data_generator_keypoint
    #image_meta:image_id,image_shape,windows.active_class_ids
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 1735, in load_image_gt_keypoints
    # mask, class_ids = dataset.load_mask(image_id)
  File "<ipython-inp

ERROR:root:Error processing image {'height': 511, 'path': './data/train/Images/trousers/20ef1d53a9ea239ed1c8b89a3bcdf0dc.jpg', 'image_category': 'trousers', 'key_points': [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [150, 179, 1], [213, 168, 1], [-1, -1, -1], [-1, -1, -1], [189, 232, 1], [219, 378, 1], [192, 388, 1], [196, 381, 0], [221, 379, 1]], 'source': 'FI', 'id': 'Images/trousers/20ef1d53a9ea239ed1c8b89a3bcdf0dc.jpg', 'width': 383}
Traceback (most recent call last):
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 2195, in data_generator_keypoint
    #image_meta:image_id,image_shape,windows.active_class_ids
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 1735, in load_image_gt_keypoints
    # mask, class_ids = dataset.load_mask(image_id)
  File "<ipython-

ERROR:root:Error processing image {'height': 511, 'path': './data/train/Images/skirt/d95ba38d4615f31d88ef19930e93c93f.jpg', 'image_category': 'skirt', 'key_points': [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [222, 256, 1], [318, 244, 1], [221, 443, 1], [373, 431, 1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1]], 'source': 'FI', 'id': 'Images/skirt/d95ba38d4615f31d88ef19930e93c93f.jpg', 'width': 511}
Traceback (most recent call last):
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 2195, in data_generator_keypoint
    #image_meta:image_id,image_shape,windows.active_class_ids
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 1735, in load_image_gt_keypoints
    # mask, class_ids = dataset.load_mask(image_id)
  File "<ipython-input-10-d0f

ERROR:root:Error processing image {'height': 511, 'path': './data/train/Images/blouse/5ed30456a56df2d8ee0f62d45c748a8d.jpg', 'image_category': 'blouse', 'key_points': [[245, 131, 1], [299, 139, 0], [276, 157, 1], [189, 159, 1], [352, 176, 1], [208, 244, 1], [331, 249, 0], [-1, -1, -1], [-1, -1, -1], [202, 294, 1], [144, 279, 1], [326, 304, 1], [367, 303, 1], [195, 366, 1], [340, 367, 1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1]], 'source': 'FI', 'id': 'Images/blouse/5ed30456a56df2d8ee0f62d45c748a8d.jpg', 'width': 511}
Traceback (most recent call last):
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 2195, in data_generator_keypoint
    #image_meta:image_id,image_shape,windows.active_class_ids
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 1735, in load_image_gt_keypoints
    # mask, class_ids = dataset.load_mask(image_id)
  File "<ipython-

ERROR:root:Error processing image {'height': 511, 'path': './data/train/Images/skirt/87ada8d86094e619123c259a9622d5a0.jpg', 'image_category': 'skirt', 'key_points': [[-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [118, 55, 1], [231, 54, 1], [54, 360, 1], [290, 365, 1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1], [-1, -1, -1]], 'source': 'FI', 'id': 'Images/skirt/87ada8d86094e619123c259a9622d5a0.jpg', 'width': 340}
Traceback (most recent call last):
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 2195, in data_generator_keypoint
    #image_meta:image_id,image_shape,windows.active_class_ids
  File "/home/shenhuaifeng/Documents/Mask_RCNN_Humanpose-master/model.py", line 1735, in load_image_gt_keypoints
    # mask, class_ids = dataset.load_mask(image_id)
  File "<ipython-input-10-d0f5ce

TypeError: list indices must be integers or slices, not str