In [2]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from pybaseutils import file_utils, image_utils
import time
import json

In [36]:
def get_plate_licenses(plate):
    """
    普通蓝牌共有7位字符；新能源车牌有8位字符： https://baike.baidu.com/item/%E8%BD%A6%E7%89%8C/8347320?fr=aladdin
    《新能源电动汽车牌照和普通牌照区别介绍》https://www.yoojia.com/ask/4-11906976349117851507.html
    新能源汽车车牌可分为三部分：省份简称(1位汉字)十地方行政区代号(1位字母)十序号(6位)
    字母“D”代表纯电动汽车；
    字母“F”代表非纯电动汽车(包括插电式混合动力和燃料电池汽车等)。
    :param plate:
    :return:
    """
    provinces = ["皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣", "鲁", "豫", "鄂", "湘", "粤",
                 "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁", "新", "警", "学", "O"]
    alphabets = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
                 'W', 'X', 'Y', 'Z', 'O']
    ads = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
           'Y', 'Z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'O']
    result = [provinces[int(plate[0])], alphabets[int(plate[1])]]
    result += [ads[int(p)] for p in plate[2:]]
    result = "".join(result)
    # 新能源车牌的要求，如果不是新能源车牌可以删掉这个if
    # if result[2] != 'D' and result[2] != 'F' \
    #         and result[-1] != 'D' and result[-1] != 'F':
    #     # print(plate)
    #     print("Error label, Please check!")
    # print(plate, result)
    return result

In [3]:
def parser_annotations(image_file):
    """
    :param image_file: 图片路径
    :return: 返回标注信息info
    """
    filename = os.path.basename(image_file)
    try:
        annotations = filename.split("-")
        rate = annotations[0]  # 车牌区域占整个画面的比例；
        angle = annotations[1].split("_")  # 车牌水平和垂直角度, 水平95°, 竖直113°
        box = annotations[2].replace("&", "_").split("_")  # 标注框左上、右下坐标，左上(154, 383), 右下(386, 473)
        point = annotations[3].replace("&", "_").split("_")  # 标注框四个角点坐标，顺序为右下、左下、左上、右上
        plate = annotations[4].split("_")  # licenses 标注框四个角点坐标，顺序为右下、左下、左上、右上
        # print('plate = ', plate)
        plate = get_plate_licenses(plate)
        box = [int(b) for b in box]
        point = [int(b) for b in point]
        point = np.asarray(point).reshape(-1, 2)
        bboxes = [box]
        angles = [angle]
        points = [point]
        plates = [plate]
        labels = ["plate"] * len(bboxes)
    except Exception as e:
        bboxes = []
        points = []
        labels = []
        plates = []
        angles = []
    info = {"filename": filename, "bboxes": bboxes, "points": points,
            "labels": labels, "plates": plates, "angles": angles}
    return info

In [32]:
def save_plate_licenses(image, bboxes, plates, out_dir, name=""):
    crops = image_utils.get_bboxes_crop(image, bboxes)
    for i in range(len(crops)):
        label = plates[i]
        # print(label)
        image_id = file_utils.get_time(format="p")
        # 获取当前的时间
        file = os.path.join(out_dir, "{}_{:0=3d}_{}.jpg".format(label, i, image_id))
        file_utils.create_file_path(file)
        cv2.imencode('.jpg', crops[i])[1].tofile(file)

In [18]:
img_path = 'F:/CCPD2020/ccpd_green/test/01-90_265-231&522_405&574-405&571_235&574_231&523_403&522-0_0_3_1_28_29_30_30-134-56.jpg'
img_info = parser_annotations(img_path)
print(img_info)

{'filename': '01-90_265-231&522_405&574-405&571_235&574_231&523_403&522-0_0_3_1_28_29_30_30-134-56.jpg', 'bboxes': [[231, 522, 405, 574]], 'points': [array([[405, 571],
       [235, 574],
       [231, 523],
       [403, 522]])], 'labels': ['plate'], 'plates': ['皖ADB4566'], 'angles': [['90', '265']]}


In [28]:
save_plate_licenses(cv2.imread(img_path), img_info['bboxes'], img_info['plates'], 'plates')

皖ADB4566


# 获取车牌图片

In [34]:
# 读取F:\CCPD2020\ccpd_green\train目录下的所有图片
img_dir = 'F:/CCPD2020/ccpd_green/test'
img_list = file_utils.get_all_files(img_dir)
for img in tqdm(img_list):
    img_info = parser_annotations(img)
    save_path = 'F:/CCPD2020/ccpd_green_plate/test'
    save_plate_licenses(cv2.imread(img), img_info['bboxes'], img_info['plates'], save_path)

100%|██████████| 5006/5006 [03:38<00:00, 22.93it/s]


In [37]:
# 读取F:\CCPD2019\splits\train.txt 文件
with open('F:/CCPD2019/splits/train.txt', 'r') as f:
    lines = f.readlines()
    for line in tqdm(lines):
        line = line.strip()
        img_path = os.path.join('F:/CCPD2019/', line)
        img_info = parser_annotations(img_path)
        save_path = 'F:/CCPD2019/ccpd_plate/train'
        save_plate_licenses(cv2.imread(img_path), img_info['bboxes'], img_info['plates'], save_path)

100%|██████████| 100000/100000 [1:58:44<00:00, 14.04it/s]  


In [38]:
# 读取F:\CCPD2019\splits\val.txt 文件
with open('F:/CCPD2019/splits/val.txt', 'r') as f:
    lines = f.readlines()
    for line in tqdm(lines):
        line = line.strip()
        img_path = os.path.join('F:/CCPD2019/', line)
        img_info = parser_annotations(img_path)
        save_path = 'F:/CCPD2019/ccpd_plate/val'
        save_plate_licenses(cv2.imread(img_path), img_info['bboxes'], img_info['plates'], save_path)

100%|██████████| 99996/99996 [1:13:45<00:00, 22.60it/s]  


In [39]:
# 读取F:\CCPD2019\splits\val.txt 文件
with open('F:/CCPD2019/splits/test.txt', 'r') as f:
    lines = f.readlines()
    for line in tqdm(lines):
        line = line.strip()
        img_path = os.path.join('F:/CCPD2019/', line)
        img_info = parser_annotations(img_path)
        save_path = 'F:/CCPD2019/ccpd_plate/test'
        save_plate_licenses(cv2.imread(img_path), img_info['bboxes'], img_info['plates'], save_path)

100%|██████████| 141982/141982 [49:37<00:00, 47.69it/s]  


# 生成车牌识别标签

In [46]:
# 读取F:\CCPD2019\ccpd_plate_imgs\train目录下的所有图片
img_dir = 'F:/CCPD2019/ccpd_plate_imgs/train'
img_list = file_utils.get_all_files(img_dir)
train_json = {}
metainfo = {}
data_list = []
metainfo['dataset_type'] = 'TextRecogDataset'
metainfo['task_name'] = 'textrecog'
for img_path in tqdm(img_list):
    text = os.path.basename(img_path).split('_')[0]
    # img_path = img
    data_list.append(
            {
                'instances': [{"text": text}],
                'img_path': img_path
            }
        )
train_json['metainfo'] = metainfo
train_json['data_list'] = data_list
with open('F:/CCPD2019/textrecog_train.json', "w") as f:
    json.dump(train_json, f, ensure_ascii=False)

100%|██████████| 100000/100000 [00:02<00:00, 41753.70it/s]


In [47]:
# 读取F:\CCPD2019\ccpd_plate_imgs\val目录下的所有图片
img_dir = 'F:/CCPD2019/ccpd_plate_imgs/val'
img_list = file_utils.get_all_files(img_dir)
train_json = {}
metainfo = {}
data_list = []
metainfo['dataset_type'] = 'TextRecogDataset'
metainfo['task_name'] = 'textrecog'
for img_path in tqdm(img_list):
    text = os.path.basename(img_path).split('_')[0]
    # img_path = img
    data_list.append(
            {
                'instances': [{"text": text}],
                'img_path': img_path
            }
        )
train_json['metainfo'] = metainfo
train_json['data_list'] = data_list
with open('F:/CCPD2019/textrecog_val.json', "w") as f:
    json.dump(train_json, f, ensure_ascii=False)

100%|██████████| 99996/99996 [00:01<00:00, 61763.66it/s]


In [48]:
# 读取F:\CCPD2019\ccpd_plate_imgs\test目录下的所有图片
img_dir = 'F:/CCPD2019/ccpd_plate_imgs/test'
img_list = file_utils.get_all_files(img_dir)
train_json = {}
metainfo = {}
data_list = []
metainfo['dataset_type'] = 'TextRecogDataset'
metainfo['task_name'] = 'textrecog'
for img_path in tqdm(img_list):
    text = os.path.basename(img_path).split('_')[0]
    # img_path = img
    data_list.append(
            {
                'instances': [{"text": text}],
                'img_path': img_path
            }
        )
train_json['metainfo'] = metainfo
train_json['data_list'] = data_list
with open('F:/CCPD2019/textrecog_test.json', "w") as f:
    json.dump(train_json, f, ensure_ascii=False)

100%|██████████| 141982/141982 [00:02<00:00, 59656.35it/s]


In [3]:
# 读取F:\CCPD2020\ccpd_green_plate\train目录下的所有图片
img_dir = 'F:/CCPD2020/ccpd_green_plate/train'
img_list = file_utils.get_all_files(img_dir)
train_json = {}
metainfo = {}
data_list = []
metainfo['dataset_type'] = 'TextRecogDataset'
metainfo['task_name'] = 'textrecog'
for img_path in tqdm(img_list):
    text = os.path.basename(img_path).split('_')[0]
    # img_path = img
    data_list.append(
            {
                'instances': [{"text": text}],
                'img_path': img_path
            }
        )
train_json['metainfo'] = metainfo
train_json['data_list'] = data_list
with open('F:/CCPD2020/textrecog_train.json', "w") as f:
    json.dump(train_json, f, ensure_ascii=False)

100%|██████████| 5769/5769 [00:00<00:00, 58865.31it/s]


In [4]:
# 读取F:\CCPD2020\ccpd_green_plate\val目录下的所有图片
img_dir = 'F:/CCPD2020/ccpd_green_plate/val'
img_list = file_utils.get_all_files(img_dir)
train_json = {}
metainfo = {}
data_list = []
metainfo['dataset_type'] = 'TextRecogDataset'
metainfo['task_name'] = 'textrecog'
for img_path in tqdm(img_list):
    text = os.path.basename(img_path).split('_')[0]
    # img_path = img
    data_list.append(
            {
                'instances': [{"text": text}],
                'img_path': img_path
            }
        )
train_json['metainfo'] = metainfo
train_json['data_list'] = data_list
with open('F:/CCPD2020/textrecog_val.json', "w") as f:
    json.dump(train_json, f, ensure_ascii=False)

100%|██████████| 1001/1001 [00:00<00:00, 50047.66it/s]


In [None]:
# 读取F:\CCPD2020\ccpd_green_plate\test目录下的所有图片
img_dir = 'F:/CCPD2020/ccpd_green_plate/test'
img_list = file_utils.get_all_files(img_dir)
train_json = {}
metainfo = {}
data_list = []
metainfo['dataset_type'] = 'TextRecogDataset'
metainfo['task_name'] = 'textrecog'
for img_path in tqdm(img_list):
    text = os.path.basename(img_path).split('_')[0]
    # img_path = img
    data_list.append(
            {
                'instances': [{"text": text}],
                'img_path': img_path
            }
        )
train_json['metainfo'] = metainfo
train_json['data_list'] = data_list
with open('F:/CCPD2020/textrecog_test.json', "w") as f:
    json.dump(train_json, f, ensure_ascii=False)

In [12]:
import json
# 读取 ccpd_green/train 目录下的所有图片
# 利用 parser_annotations 函数解析图片文件名，得到标注信息
def ccpd2coco(image_dir):
    # 获取image_dir目录下的所有文件的文件名
    image_files = os.listdir(image_dir)
    annotations = []
    for image_file in tqdm(image_files):
        info = parser_annotations(image_file)
        annotations.append(info)
    # return annotations
    #转换为coco格式
    coco = convert_annotations(annotations, image_dir)
    # 保存为json文件
    # 文件名为 image_dir / 后的文件名
    with open(os.path.join(image_dir.split("/")[1] + ".json"), "w") as f:
        json.dump(coco, f)
    # return coco
def convert_annotations(annotations, image_dir):
    coco = {"images": [], "annotations": [], "categories": []}

    categories = {"supercategory": "none", "id": 1, "name": "plate"}
    coco["categories"].append(categories)
    img_id = 1
    for i, info in enumerate(annotations):
        print(i)
        filename = info["filename"]
        img = cv2.imread(os.path.join(image_dir, filename))
        image = {
            "file_name": filename, 
            "height": img.shape[0], 
            "width": img.shape[1], 
            "id": img_id
        }
        coco["images"].append(image)
        
        bboxes = info["bboxes"]
        for j in range(len(bboxes)):
            bbox = bboxes[j]
            annotation_id = file_utils.get_time(format="p")
            x1, y1, x2, y2 = bbox
            h = y2 - y1
            w = x2 - x1
            annotation = {
                "id": annotation_id,
                "segmentation": [], 
                "iscrowd": 0, 
                "image_id": img_id,
                "bbox": [x1, y1, w, h], 
                "area": w * h,
                "category_id": 1, 
                
            }
            coco["annotations"].append(annotation)
        img_id += 1
    return coco

In [None]:
ccpd2coco('ccpd_green/train')
ccpd2coco('ccpd_green/val')