Skip to content

Commit

Permalink
Add list_from_file and list_to_file (open-mmlab#226)
Browse files Browse the repository at this point in the history
* Add list_from_file and list_to_file

Signed-off-by: lizz <lizz@sensetime.com>

* Add test list_to_file and list_from_file

* more

* Fix tests
  • Loading branch information
innerlee committed May 24, 2021
1 parent cdab0dc commit 0b360a5
Show file tree
Hide file tree
Showing 15 changed files with 261 additions and 200 deletions.
7 changes: 3 additions & 4 deletions mmocr/datasets/kie_dataset.py
@@ -1,15 +1,14 @@
import copy
from os import path as osp

import mmcv
import numpy as np
import torch

import mmocr.utils as utils
from mmdet.datasets.builder import DATASETS
from mmocr.core import compute_f1_score
from mmocr.datasets.base_dataset import BaseDataset
from mmocr.datasets.pipelines import sort_vertex8
from mmocr.utils import is_type_list, list_from_file


@DATASETS.register_module()
Expand Down Expand Up @@ -52,7 +51,7 @@ def __init__(self,
'': 0,
**{
line.rstrip('\r\n'): ind
for ind, line in enumerate(mmcv.list_from_file(dict_file), 1)
for ind, line in enumerate(list_from_file(dict_file), 1)
}
}

Expand All @@ -79,7 +78,7 @@ def _parse_anno_info(self, annotations):
box_num * (box_num + 1).
"""

assert utils.is_type_list(annotations, dict)
assert is_type_list(annotations, dict)
assert len(annotations) > 0, 'Please remove data with empty annotation'
assert 'box' in annotations[0]
assert 'text' in annotations[0]
Expand Down
5 changes: 2 additions & 3 deletions mmocr/datasets/utils/loader.py
@@ -1,8 +1,7 @@
import os.path as osp

import mmcv

from mmocr.datasets.builder import LOADERS, build_parser
from mmocr.utils import list_from_file


@LOADERS.register_module()
Expand Down Expand Up @@ -60,7 +59,7 @@ class HardDiskLoader(Loader):
"""

def _load(self, ann_file):
return mmcv.list_from_file(ann_file)
return list_from_file(ann_file)


@LOADERS.register_module()
Expand Down
9 changes: 4 additions & 5 deletions mmocr/models/kie/extractors/sdmgr.py
Expand Up @@ -8,6 +8,7 @@
from mmdet.models.builder import DETECTORS, build_roi_extractor
from mmdet.models.detectors import SingleStageDetector
from mmocr.core import imshow_edge_node
from mmocr.utils import list_from_file


@DETECTORS.register_module()
Expand Down Expand Up @@ -126,11 +127,9 @@ def show_result(self,

idx_to_cls = {}
if self.class_list is not None:
with open(self.class_list, 'r') as fr:
for line in fr:
line = line.strip().split()
class_idx, class_label = line
idx_to_cls[class_idx] = class_label
for line in list_from_file(self.class_list):
class_idx, class_label = line.strip().split()
idx_to_cls[class_idx] = class_label

# if out_file specified, do not show image in window
if out_file is not None:
Expand Down
10 changes: 5 additions & 5 deletions mmocr/models/textrecog/convertors/base.py
@@ -1,4 +1,5 @@
from mmocr.models.builder import CONVERTORS
from mmocr.utils import list_from_file


@CONVERTORS.register_module()
Expand Down Expand Up @@ -27,11 +28,10 @@ def __init__(self, dict_type='DICT90', dict_file=None, dict_list=None):
assert dict_list is None or isinstance(dict_list, list)
self.idx2char = []
if dict_file is not None:
with open(dict_file, encoding='utf-8') as fr:
for line in fr:
line = line.strip()
if line != '':
self.idx2char.append(line)
for line in list_from_file(dict_file):
line = line.strip()
if line != '':
self.idx2char.append(line)
elif dict_list is not None:
self.idx2char = dict_list
else:
Expand Down
4 changes: 3 additions & 1 deletion mmocr/utils/__init__.py
Expand Up @@ -4,6 +4,7 @@
is_none_or_type, is_type_list, valid_boundary)
from .collect_env import collect_env
from .data_convert_util import convert_annotations
from .fileio import list_from_file, list_to_file
from .img_util import drop_orientation, is_not_png
from .lmdb_util import lmdb_converter
from .logger import get_root_logger
Expand All @@ -12,5 +13,6 @@
'Registry', 'build_from_cfg', 'get_root_logger', 'collect_env',
'is_3dlist', 'is_ndarray_list', 'is_type_list', 'is_none_or_type',
'equal_len', 'is_2dlist', 'valid_boundary', 'lmdb_converter',
'drop_orientation', 'convert_annotations', 'is_not_png'
'drop_orientation', 'convert_annotations', 'is_not_png', 'list_to_file',
'list_from_file'
]
31 changes: 31 additions & 0 deletions mmocr/utils/fileio.py
@@ -0,0 +1,31 @@
def list_to_file(filename, lines):
"""Write a list of strings to a text file.
Args:
filename (str): The output filename. It will be created/overwritten.
lines (list(str)): Data to be written.
"""
with open(filename, 'w', encoding='utf-8') as fw:
for line in lines:
fw.write(f'{line}\n')


def list_from_file(filename, encoding='utf-8'):
"""Load a text file and parse the content as a list of strings. The
trailing "\\r" and "\\n" of each line will be removed.
Note:
This will be replaced by mmcv's version after it supports encoding.
Args:
filename (str): Filename.
encoding (str): Encoding used to open the file. Default utf-8.
Returns:
list[str]: A list of strings.
"""
item_list = []
with open(filename, 'r', encoding=encoding) as f:
for line in f:
item_list.append(line.rstrip('\n\r'))
return item_list
9 changes: 5 additions & 4 deletions mmocr/utils/lmdb_util.py
Expand Up @@ -5,11 +5,12 @@

import lmdb

from mmocr.utils import list_from_file

def lmdb_converter(img_list, output, batch_size=1000, coding='utf-8'):
# read img_list
with open(img_list) as f:
lines = f.readlines()

def lmdb_converter(img_list_file, output, batch_size=1000, coding='utf-8'):
# read img_list_file
lines = list_from_file(img_list_file)

# create lmdb database
if Path(output).is_dir():
Expand Down
46 changes: 46 additions & 0 deletions tests/test_utils/test_textio.py
@@ -0,0 +1,46 @@
import tempfile

from mmocr.utils import list_from_file, list_to_file

lists = [
[],
[' '],
['\t'],
['a'],
[1],
[1.],
['a', 'b'],
['a', 1, 1.],
[1, 1., 'a'],
['啊', '啊啊'],
['選択', 'noël', 'Информацией', 'ÄÆä'],
]


def test_list_to_file():
with tempfile.TemporaryDirectory() as tmpdirname:
for i, lines in enumerate(lists):
filename = f'{tmpdirname}/{i}.txt'
list_to_file(filename, lines)
lines2 = [
line.rstrip('\r\n')
for line in open(filename, 'r', encoding='utf-8').readlines()
]
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2 for line1, line2 in zip(lines, lines2))


def test_list_from_file():
with tempfile.TemporaryDirectory() as tmpdirname:
for encoding in ['utf-8', 'utf-8-sig']:
for lineend in ['\n', '\r\n']:
for i, lines in enumerate(lists):
filename = f'{tmpdirname}/{i}.txt'
with open(filename, 'w', encoding=encoding) as f:
f.writelines(f'{line}{lineend}' for line in lines)
lines2 = list_from_file(filename, encoding=encoding)
lines = list(map(str, lines))
assert len(lines) == len(lines2)
assert all(line1 == line2
for line1, line2 in zip(lines, lines2))
32 changes: 14 additions & 18 deletions tools/data/textdet/coco_to_line_dict.py
@@ -1,16 +1,13 @@
import argparse
import codecs
import json

import mmcv

def read_json(fpath):
with codecs.open(fpath, 'r', 'utf-8') as f:
obj = json.load(f)
return obj
from mmocr.utils import list_to_file


def parse_coco_json(in_path):
json_obj = read_json(in_path)
json_obj = mmcv.load(in_path)
image_infos = json_obj['images']
annotations = json_obj['annotations']
imgid2imgname = {}
Expand All @@ -35,18 +32,17 @@ def parse_coco_json(in_path):


def gen_line_dict_file(out_path, imgid2imgname, imgid2anno):
# import pdb; pdb.set_trace()
with codecs.open(out_path, 'w', 'utf-8') as fw:
for key, value in imgid2imgname.items():
if key in imgid2anno:
anno = imgid2anno[key]
line_dict = {}
line_dict['file_name'] = value['file_name']
line_dict['height'] = value['height']
line_dict['width'] = value['width']
line_dict['annotations'] = anno
line_dict_str = json.dumps(line_dict)
fw.write(line_dict_str + '\n')
lines = []
for key, value in imgid2imgname.items():
if key in imgid2anno:
anno = imgid2anno[key]
line_dict = {}
line_dict['file_name'] = value['file_name']
line_dict['height'] = value['height']
line_dict['width'] = value['width']
line_dict['annotations'] = anno
lines.append(json.dumps(line_dict))
list_to_file(out_path, lines)


def parse_args():
Expand Down
8 changes: 3 additions & 5 deletions tools/data/textdet/ctw1500_converter.py
Expand Up @@ -8,7 +8,8 @@
import numpy as np
from shapely.geometry import Polygon

from mmocr.utils import convert_annotations, drop_orientation, is_not_png
from mmocr.utils import (convert_annotations, drop_orientation, is_not_png,
list_from_file)


def collect_files(img_dir, gt_dir, split):
Expand Down Expand Up @@ -84,11 +85,8 @@ def collect_annotations(files, split, nproc=1):


def load_txt_info(gt_file, img_info):
with open(gt_file) as f:
gt_list = f.readlines()

anno_info = []
for line in gt_list:
for line in list_from_file(gt_file):
# each line has one ploygen (n vetices), and one text.
# e.g., 695,885,866,888,867,1146,696,1143,####Latin 9
line = line.strip()
Expand Down
9 changes: 4 additions & 5 deletions tools/data/textdet/icdar_converter.py
Expand Up @@ -7,7 +7,8 @@
import numpy as np
from shapely.geometry import Polygon

from mmocr.utils import convert_annotations, drop_orientation, is_not_png
from mmocr.utils import (convert_annotations, drop_orientation, is_not_png,
list_from_file)


def collect_files(img_dir, gt_dir):
Expand Down Expand Up @@ -96,11 +97,9 @@ def load_img_info(files, dataset):
assert img.shape[0:2] == img_color.shape[0:2]

if dataset == 'icdar2017':
with open(gt_file) as f:
gt_list = f.readlines()
gt_list = list_from_file(gt_file)
elif dataset == 'icdar2015':
with open(gt_file, mode='r', encoding='utf-8-sig') as f:
gt_list = f.readlines()
gt_list = list_from_file(gt_file, encoding='utf-8-sig')
else:
raise NotImplementedError(f'Not support {dataset}')

Expand Down

0 comments on commit 0b360a5

Please sign in to comment.