In [None]:
# -*- coding: utf-8 -*-
'''
@time: 2019/01/11 11:28
spytensor
'''

import os
import json
import numpy as np
import pandas as pd
import glob
import cv2
import os
import shutil
from IPython import embed
from sklearn.model_selection import train_test_split
np.random.seed(41)

#0为背景
classname_to_id = {"person": 1}

class Csv2CoCo:

    def __init__(self,image_dir,total_annos):
        self.images = []
        self.annotations = []
        self.categories = []
        self.img_id = 0
        self.ann_id = 0
        self.image_dir = image_dir
        self.total_annos = total_annos

    def save_coco_json(self, instance, save_path):
        json.dump(instance, open(save_path, 'w'), ensure_ascii=False, indent=2)  # indent=2 更加美观显示

    # 由txt文件构建COCO
    def to_coco(self, keys):
        self._init_categories()
        for key in keys:
            self.images.append(self._image(key))
            shapes = self.total_annos[key]
            for shape in shapes:
                bboxi = []
                for cor in shape[:-1]:
                    bboxi.append(int(cor))
                label = shape[-1]
                annotation = self._annotation(bboxi,label)
                self.annotations.append(annotation)
                self.ann_id += 1
            self.img_id += 1
        instance = {}
        instance['info'] = 'spytensor created'
        instance['license'] = ['license']
        instance['images'] = self.images
        instance['annotations'] = self.annotations
        instance['categories'] = self.categories
        return instance

    # 构建类别
    def _init_categories(self):
        for k, v in classname_to_id.items():
            category = {}
            category['id'] = v
            category['name'] = k
            self.categories.append(category)

    # 构建COCO的image字段
    def _image(self, path):
        image = {}
        print(path)
        img = cv2.imread(self.image_dir + path)
        image['height'] = img.shape[0]
        image['width'] = img.shape[1]
        image['id'] = self.img_id
        image['file_name'] = path
        return image

    # 构建COCO的annotation字段
    def _annotation(self, shape,label):
        # label = shape[-1]
        points = shape[:4]
        annotation = {}
        annotation['id'] = self.ann_id
        annotation['image_id'] = self.img_id
        annotation['category_id'] = int(classname_to_id[label])
        annotation['segmentation'] = self._get_seg(points)
        annotation['bbox'] = self._get_box(points)
        annotation['iscrowd'] = 0
        annotation['area'] = self._get_area(points)
        return annotation

    # COCO的格式： [x1,y1,w,h] 对应COCO的bbox格式
    def _get_box(self, points):
        min_x = points[0]
        min_y = points[1]
        max_x = points[2]
        max_y = points[3]
        return [min_x, min_y, max_x - min_x, max_y - min_y]
    # 计算面积
    def _get_area(self, points):
        min_x = points[0]
        min_y = points[1]
        max_x = points[2]
        max_y = points[3]
        return (max_x - min_x+1) * (max_y - min_y+1)
    # segmentation
    def _get_seg(self, points):
        min_x = points[0]
        min_y = points[1]
        max_x = points[2]
        max_y = points[3]
        h = max_y - min_y
        w = max_x - min_x
        a = []
        a.append([min_x,min_y, min_x,min_y+0.5*h, min_x,max_y, min_x+0.5*w,max_y, max_x,max_y, max_x,max_y-0.5*h, max_x,min_y, max_x-0.5*w,min_y])
        return a
   


   

In [None]:
label2num = {'长马甲': 0, '古装': 1, '短马甲': 2, '背心上衣': 3, '背带裤': 4, '连体衣': 5, '吊带上衣': 6, '中裤': 7, '短袖衬衫': 8, '无袖上衣': 9,
                 '长袖衬衫': 10, '中等半身裙': 11, '长半身裙': 12, '长外套': 13, '短裙': 14, '无袖连衣裙': 15, '短裤': 16, '短外套': 17,
                 '长袖连衣裙': 18, '长袖上衣': 19, '长裤': 20, '短袖连衣裙': 21, '短袖上衣': 22, '古风': 1}

In [None]:
def creat_csv_image_dataset(data_root_path='data/',mode='train'):
    dataset_paths = glob.glob(data_root_path+mode+'*')
    # 图像库中标注
    img_ann_folder_paths = []  # 所有data/train_dataset_part<n>/image_annotatonl中所有文件夹

    # 视频库中标注
    video_ann_paths = []  # 所有data/train_dataset_part<n>/video_annotation中所有json文件


    for dataset_path in dataset_paths:
        img_ann_folder_paths.extend(glob.glob(dataset_path + '/image_annotation/*'))

        video_ann_paths.extend(glob.glob(dataset_path + '/video_annotation/*.json'))

    image_db = []
    for img_ann_folder_path in img_ann_folder_paths[:]:
        split_list = img_ann_folder_path.split('/')
        img_folder_path = 'data/' + split_list[1] + '/image/' + split_list[-1] + '/'
        json_paths = glob.glob(img_ann_folder_path + '/*.json')
        for json_path in json_paths:
            with open(json_path, 'r') as f:
                img_anns = json.load(f)
            if len(img_anns['annotations']) > 0:
                flag = 0
                for img_ann in img_anns['annotations']:
                    if img_ann['label'] not in aug_label:
                        flag = 1
                        break
                img_path = img_folder_path + json_path.split('/')[-1].split('.')[0] + '.jpg'
                image_db.append([img_path, json_path, -1])
                if flag:
                    break
    image_db = pd.DataFrame(image_db, columns=['file', 'ann', 'frame'])

    video_db = []

    for json_path in video_ann_paths[:]:
        with open(json_path, 'r') as f:  # 'data/train_dataset_part3/video_annotation/002061.json'
            v_ann = json.load(f)
        split_list = json_path.split('/')
        img_folder_path = 'data/' + split_list[1] + '/video/' + split_list[-1].split('.')[0] + '.mp4'
        for fram in v_ann['frames']:
            if len(fram['annotations']) > 0:
                flag = 0
                for img_ann in fram['annotations']:
                    if img_ann['label'] not in aug_label:
                        flag = 1
                        break
                frame_index = fram['frame_index']
                video_db.append([img_folder_path, json_path, frame_index])
                if flag:
                    break

    video_db = pd.DataFrame(video_db, columns=['file', 'ann', 'frame'])
    train_db = pd.concat([image_db, video_db])
    assert len(train_db) == len(image_db) + len(video_db)
    train_db.to_csv(data_root_path+mode+'_down_sample.csv', index=False)
    print('已生成csv路径文件：' + data_root_path+mode+'_down_sample.csv')
    print(train_db.info())

In [None]:
csv_file = "train.csv"
image_dir = "images/"
saved_coco_path = "./"
# 整合csv格式标注文件
total_csv_annotations = {}
annotations = pd.read_csv(csv_file,header=None).values
for annotation in annotations:
    key = annotation[0].split(os.sep)[-1]
    value = np.array([annotation[1:]])
    if key in total_csv_annotations.keys():
        total_csv_annotations[key] = np.concatenate((total_csv_annotations[key],value),axis=0)
    else:
        total_csv_annotations[key] = value
# 按照键值划分数据
total_keys = list(total_csv_annotations.keys())
train_keys, val_keys = train_test_split(total_keys, test_size=0.2)
print("train_n:", len(train_keys), 'val_n:', len(val_keys))
# 创建必须的文件夹
if not os.path.exists('%scoco/annotations/'%saved_coco_path):
    os.makedirs('%scoco/annotations/'%saved_coco_path)
if not os.path.exists('%scoco/images/train2017/'%saved_coco_path):
    os.makedirs('%scoco/images/train2017/'%saved_coco_path)
if not os.path.exists('%scoco/images/val2017/'%saved_coco_path):
    os.makedirs('%scoco/images/val2017/'%saved_coco_path)
# 把训练集转化为COCO的json格式
l2c_train = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations)
train_instance = l2c_train.to_coco(train_keys)
l2c_train.save_coco_json(train_instance, '%scoco/annotations/instances_train2017.json'%saved_coco_path)
for file in train_keys:
    shutil.copy(image_dir+file,"%scoco/images/train2017/"%saved_coco_path)
for file in val_keys:
    shutil.copy(image_dir+file,"%scoco/images/val2017/"%saved_coco_path)
# 把验证集转化为COCO的json格式
l2c_val = Csv2CoCo(image_dir=image_dir,total_annos=total_csv_annotations)
val_instance = l2c_val.to_coco(val_keys)
l2c_val.save_coco_json(val_instance, '%scoco/annotations/instances_val2017.json'%saved_coco_path)