diff --git a/nlp_metrics/Makefile b/nlp_metrics/Makefile new file mode 100644 index 0000000..ab5e3d2 --- /dev/null +++ b/nlp_metrics/Makefile @@ -0,0 +1,9 @@ +all: + # install pycocotools locally + python setup.py build_ext --inplace + rm -rf build + +install: + # install pycocotools to the Python site-packages + python setup.py build_ext install + rm -rf build \ No newline at end of file diff --git a/nlp_metrics/README.md b/nlp_metrics/README.md index 0129a69..854f734 100644 --- a/nlp_metrics/README.md +++ b/nlp_metrics/README.md @@ -9,21 +9,22 @@ the [COCO Caption Evaluation](https://github.com/tylin/coco-caption) library, an - python (tested 2.7/3.6) ## Files ## - -- evals.py: The file includes MIMICEavlCap and COCOEavlCap class. +- eval: The file includes MIMICEavlCap and COCOEavlCap class. - tokenizer: Python wrapper of Stanford CoreNLP PTBTokenizer -- coco: pycocotools from [cocodataset](https://github.com/cocodataset) / [cocoapi](https://github.com/cocodataset/cocoapi) - bleu: Bleu evalutation codes - meteor: Meteor evaluation codes - rouge: Rouge-L evaluation codes - cider: CIDEr evaluation codes - spice: SPICE evaluation codes +- pycocotools and common: Adapted from [COCO Dataset API](https://github.com/cocodataset/cocoapi) ## Setup ## - - You will first need to download the [Stanford CoreNLP 3.6.0](http://stanfordnlp.github.io/CoreNLP/index.html) code and models for use by SPICE. To do this, run either: - ``python get_stanford_models.py`` - ``./get_stanford_models.sh`` + +- Run ``make`` in this directory to build dependencies for ``pycocotools``. Requires ``cython>=0.27.3`` + ## Notes ## - SPICE will try to create a cache of parsed sentences in ./spice/cache/. This dramatically speeds up repeated evaluations. - Without altering this code, use the environment variables ``SPICE_CACHE_DIR`` and ``SPICE_TEMP_DIR`` to set the cache directory. @@ -32,7 +33,6 @@ the [COCO Caption Evaluation](https://github.com/tylin/coco-caption) library, an ## References ## - - [Microsoft COCO Captions: Data Collection and Evaluation Server](http://arxiv.org/abs/1504.00325) - PTBTokenizer: We use the [Stanford Tokenizer](http://nlp.stanford.edu/software/tokenizer.shtml) which is included in [Stanford CoreNLP 3.4.1](http://nlp.stanford.edu/software/corenlp.shtml). - BLEU: [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf) diff --git a/nlp_metrics/coco.py b/nlp_metrics/coco.py deleted file mode 100644 index 2fc3e9e..0000000 --- a/nlp_metrics/coco.py +++ /dev/null @@ -1,372 +0,0 @@ -from __future__ import print_function - -__author__ = 'tylin' -__version__ = '1.0.1' -# Interface for accessing the Microsoft COCO dataset. - -# Microsoft COCO is a large image dataset designed for object detection, -# segmentation, and caption generation. pycocotools is a Python API that -# assists in loading, parsing and visualizing the annotations in COCO. -# Please visit http://mscoco.org/ for more information on COCO, including -# for the data, paper, and tutorials. The exact format of the annotations -# is also described on the COCO website. For example usage of the pycocotools -# please see pycocotools_demo.ipynb. In addition to this API, please download both -# the COCO images and annotations in order to run the demo. - -# An alternative to using the API is to load the annotations directly -# into Python dictionary -# Using the API provides additional utility functions. Note that this API -# supports both *instance* and *caption* annotations. In the case of -# captions not all functions are defined (e.g. categories are undefined). - -# The following API functions are defined: -# COCO - COCO api class that loads COCO annotation file and prepare data structures. -# decodeMask - Decode binary mask M encoded via run-length encoding. -# encodeMask - Encode binary mask M using run-length encoding. -# getAnnIds - Get ann ids that satisfy given filter conditions. -# getCatIds - Get cat ids that satisfy given filter conditions. -# getImgIds - Get img ids that satisfy given filter conditions. -# loadAnns - Load anns with the specified ids. -# loadCats - Load cats with the specified ids. -# loadImgs - Load imgs with the specified ids. -# segToMask - Convert polygon segmentation to binary mask. -# showAnns - Display the specified annotations. -# loadRes - Load result file and create result api object. -# Throughout the API "ann"=annotation, "cat"=category, and "img"=image. -# Help on each functions can be accessed by: "help COCO>function". - -# See also COCO>decodeMask, -# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, -# COCO>getImgIds, COCO>loadAnns, COCO>loadCats, -# COCO>loadImgs, COCO>segToMask, COCO>showAnns - -# Microsoft COCO Toolbox. Version 1.0 -# Data, paper, and tutorials available at: http://mscoco.org/ -# Code written by Piotr Dollar and Tsung-Yi Lin, 2014. -# Licensed under the Simplified BSD License [see bsd.txt] -from builtins import int - -import json -import datetime -import matplotlib.pyplot as plt -from matplotlib.collections import PatchCollection -from matplotlib.patches import Polygon -import numpy as np -from skimage.draw import polygon -import copy - -class COCO: - def __init__(self, annotation_file=None): - """ - Constructor of Microsoft COCO helper class for reading and visualizing annotations. - :param annotation_file (str): location of annotation file - :param image_folder (str): location to the folder that hosts images. - :return: - """ - # load dataset - self.dataset = {} - self.anns = [] - self.imgToAnns = {} - self.catToImgs = {} - self.imgs = [] - self.cats = [] - if not annotation_file == None: - print('loading annotations into memory...') - time_t = datetime.datetime.utcnow() - dataset = json.load(open(annotation_file, 'r')) - print(datetime.datetime.utcnow() - time_t) - self.dataset = dataset - self.createIndex() - - def createIndex(self): - # create index - print('creating index...') - imgToAnns = {ann['image_id']: [] for ann in self.dataset['annotations']} - anns = {ann['id']: [] for ann in self.dataset['annotations']} - for ann in self.dataset['annotations']: - imgToAnns[ann['image_id']] += [ann] - anns[ann['id']] = ann - - imgs = {im['id']: {} for im in self.dataset['images']} - for img in self.dataset['images']: - imgs[img['id']] = img - - cats = [] - catToImgs = [] - if self.dataset['type'] == 'instances': - cats = {cat['id']: [] for cat in self.dataset['categories']} - for cat in self.dataset['categories']: - cats[cat['id']] = cat - catToImgs = {cat['id']: [] for cat in self.dataset['categories']} - for ann in self.dataset['annotations']: - catToImgs[ann['category_id']] += [ann['image_id']] - - print('index created!') - - # create class members - self.anns = anns - self.imgToAnns = imgToAnns - self.catToImgs = catToImgs - self.imgs = imgs - self.cats = cats - - def info(self): - """ - Print information about the annotation file. - :return: - """ - for key, value in self.datset['info'].items(): - print('%s: %s'%(key, value)) - - def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): - """ - Get ann ids that satisfy given filter conditions. default skips that filter - :param imgIds (int array) : get anns for given imgs - catIds (int array) : get anns for given cats - areaRng (float array) : get anns for given area range (e.g. [0 inf]) - iscrowd (boolean) : get anns for given crowd label (False or True) - :return: ids (int array) : integer array of ann ids - """ - imgIds = imgIds if type(imgIds) == list else [imgIds] - catIds = catIds if type(catIds) == list else [catIds] - - if len(imgIds) == len(catIds) == len(areaRng) == 0: - anns = self.dataset['annotations'] - else: - if not len(imgIds) == 0: - anns = sum([self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns],[]) - else: - anns = self.dataset['annotations'] - anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] - anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] - if self.dataset['type'] == 'instances': - if not iscrowd == None: - ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] - else: - ids = [ann['id'] for ann in anns] - else: - ids = [ann['id'] for ann in anns] - return ids - - def getCatIds(self, catNms=[], supNms=[], catIds=[]): - """ - filtering parameters. default skips that filter. - :param catNms (str array) : get cats for given cat names - :param supNms (str array) : get cats for given supercategory names - :param catIds (int array) : get cats for given cat ids - :return: ids (int array) : integer array of cat ids - """ - catNms = catNms if type(catNms) == list else [catNms] - supNms = supNms if type(supNms) == list else [supNms] - catIds = catIds if type(catIds) == list else [catIds] - - if len(catNms) == len(supNms) == len(catIds) == 0: - cats = self.dataset['categories'] - else: - cats = self.dataset['categories'] - cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms] - cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms] - cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds] - ids = [cat['id'] for cat in cats] - return ids - - def getImgIds(self, imgIds=[], catIds=[]): - ''' - Get img ids that satisfy given filter conditions. - :param imgIds (int array) : get imgs for given ids - :param catIds (int array) : get imgs with all given cats - :return: ids (int array) : integer array of img ids - ''' - imgIds = imgIds if type(imgIds) == list else [imgIds] - catIds = catIds if type(catIds) == list else [catIds] - - if len(imgIds) == len(catIds) == 0: - ids = self.imgs.keys() - else: - ids = set(imgIds) - for catId in catIds: - if len(ids) == 0: - ids = set(self.catToImgs[catId]) - else: - ids &= set(self.catToImgs[catId]) - return list(ids) - - def loadAnns(self, ids=[]): - """ - Load anns with the specified ids. - :param ids (int array) : integer ids specifying anns - :return: anns (object array) : loaded ann objects - """ - if type(ids) == list: - return [self.anns[id_] for id_ in ids] - elif isinstance(ids, int): - return [self.anns[ids]] - - def loadCats(self, ids=[]): - """ - Load cats with the specified ids. - :param ids (int array) : integer ids specifying cats - :return: cats (object array) : loaded cat objects - """ - if type(ids) == list: - return [self.cats[id_] for id_ in ids] - elif isinstance(ids, int): - return [self.cats[ids]] - - def loadImgs(self, ids=[]): - """ - Load anns with the specified ids. - :param ids (int array) : integer ids specifying img - :return: imgs (object array) : loaded img objects - """ - if type(ids) == list: - return [self.imgs[id_] for id_ in ids] - elif isinstance(ids, int): - return [self.imgs[ids]] - - def showAnns(self, anns): - """ - Display the specified annotations. - :param anns (array of object): annotations to display - :return: None - """ - if len(anns) == 0: - return 0 - if self.dataset['type'] == 'instances': - ax = plt.gca() - polygons = [] - color = [] - for ann in anns: - c = np.random.random((1, 3)).tolist()[0] - if type(ann['segmentation']) == list: - # polygon - for seg in ann['segmentation']: - poly = np.array(seg).reshape((len(seg)//2, 2)) - polygons.append(Polygon(poly, True,alpha=0.4)) - color.append(c) - else: - # mask - mask = COCO.decodeMask(ann['segmentation']) - img = np.ones( (mask.shape[0], mask.shape[1], 3) ) - if ann['iscrowd'] == 1: - color_mask = np.array([2.0,166.0,101.0])/255 - if ann['iscrowd'] == 0: - color_mask = np.random.random((1, 3)).tolist()[0] - for i in range(3): - img[:,:,i] = color_mask[i] - ax.imshow(np.dstack( (img, mask*0.5) )) - p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4) - ax.add_collection(p) - if self.dataset['type'] == 'captions': - for ann in anns: - print(ann['caption']) - - def loadRes(self, resFile): - """ - Load result file and return a result api object. - :param resFile (str) : file name of result file - :return: res (obj) : result api object - """ - res = COCO() - res.dataset['images'] = [img for img in self.dataset['images']] - res.dataset['info'] = copy.deepcopy(self.dataset['info']) - res.dataset['type'] = copy.deepcopy(self.dataset['type']) - res.dataset['licenses'] = copy.deepcopy(self.dataset['licenses']) - - print('Loading and preparing results... ') - time_t = datetime.datetime.utcnow() - anns = json.load(open(resFile)) - assert type(anns) == list, 'results in not an array of objects' - annsImgIds = [ann['image_id'] for ann in anns] - assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ - 'Results do not correspond to current coco set' - if 'caption' in anns[0]: - imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) - res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] - for id_, ann in enumerate(anns): - ann['id'] = id_ - elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id_, ann in enumerate(anns): - bb = ann['bbox'] - x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] - ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] - ann['area'] = bb[2]*bb[3] - ann['id'] = id_ - ann['iscrowd'] = 0 - elif 'segmentation' in anns[0]: - res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) - for id_, ann in enumerate(anns): - ann['area']=sum(ann['segmentation']['counts'][2:-1:2]) - ann['bbox'] = [] - ann['id'] = id_ - ann['iscrowd'] = 0 - print('DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds())) - - res.dataset['annotations'] = anns - res.createIndex() - return res - - - @staticmethod - def decodeMask(R): - """ - Decode binary mask M encoded via run-length encoding. - :param R (object RLE) : run-length encoding of binary mask - :return: M (bool 2D array) : decoded binary mask - """ - N = len(R['counts']) - M = np.zeros( (R['size'][0]*R['size'][1], )) - n = 0 - val = 1 - for pos in range(N): - val = not val - for c in range(R['counts'][pos]): - R['counts'][pos] - M[n] = val - n += 1 - return M.reshape((R['size']), order='F') - - @staticmethod - def encodeMask(M): - """ - Encode binary mask M using run-length encoding. - :param M (bool 2D array) : binary mask to encode - :return: R (object RLE) : run-length encoding of binary mask - """ - [h, w] = M.shape - M = M.flatten(order='F') - N = len(M) - counts_list = [] - pos = 0 - # counts - counts_list.append(1) - diffs = np.logical_xor(M[0:N-1], M[1:N]) - for diff in diffs: - if diff: - pos +=1 - counts_list.append(1) - else: - counts_list[pos] += 1 - # if array starts from 1. start with 0 counts for 0 - if M[0] == 1: - counts_list = [0] + counts_list - return {'size': [h, w], - 'counts': counts_list , - } - - @staticmethod - def segToMask( S, h, w ): - """ - Convert polygon segmentation to binary mask. - :param S (float array) : polygon segmentation mask - :param h (int) : target mask height - :param w (int) : target mask width - :return: M (bool 2D array) : binary mask - """ - M = np.zeros((h,w), dtype=np.bool) - for s in S: - N = len(s) - rr, cc = polygon(np.array(s[1:N:2]), np.array(s[0:N:2])) # (y, x) - M[rr, cc] = 1 - return M diff --git a/nlp_metrics/common/gason.cpp b/nlp_metrics/common/gason.cpp new file mode 100644 index 0000000..0f2c00e --- /dev/null +++ b/nlp_metrics/common/gason.cpp @@ -0,0 +1,335 @@ +// https://github.com/vivkin/gason - pulled January 10, 2016 +#include "gason.h" +#include + +#define JSON_ZONE_SIZE 4096 +#define JSON_STACK_SIZE 32 + +const char *jsonStrError(int err) { + switch (err) { +#define XX(no, str) \ + case JSON_##no: \ + return str; + JSON_ERRNO_MAP(XX) +#undef XX + default: + return "unknown"; + } +} + +void *JsonAllocator::allocate(size_t size) { + size = (size + 7) & ~7; + + if (head && head->used + size <= JSON_ZONE_SIZE) { + char *p = (char *)head + head->used; + head->used += size; + return p; + } + + size_t allocSize = sizeof(Zone) + size; + Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize); + if (zone == nullptr) + return nullptr; + zone->used = allocSize; + if (allocSize <= JSON_ZONE_SIZE || head == nullptr) { + zone->next = head; + head = zone; + } else { + zone->next = head->next; + head->next = zone; + } + return (char *)zone + sizeof(Zone); +} + +void JsonAllocator::deallocate() { + while (head) { + Zone *next = head->next; + free(head); + head = next; + } +} + +static inline bool isspace(char c) { + return c == ' ' || (c >= '\t' && c <= '\r'); +} + +static inline bool isdelim(char c) { + return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c; +} + +static inline bool isdigit(char c) { + return c >= '0' && c <= '9'; +} + +static inline bool isxdigit(char c) { + return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F'); +} + +static inline int char2int(char c) { + if (c <= '9') + return c - '0'; + return (c & ~' ') - 'A' + 10; +} + +static double string2double(char *s, char **endptr) { + char ch = *s; + if (ch == '-') + ++s; + + double result = 0; + while (isdigit(*s)) + result = (result * 10) + (*s++ - '0'); + + if (*s == '.') { + ++s; + + double fraction = 1; + while (isdigit(*s)) { + fraction *= 0.1; + result += (*s++ - '0') * fraction; + } + } + + if (*s == 'e' || *s == 'E') { + ++s; + + double base = 10; + if (*s == '+') + ++s; + else if (*s == '-') { + ++s; + base = 0.1; + } + + unsigned int exponent = 0; + while (isdigit(*s)) + exponent = (exponent * 10) + (*s++ - '0'); + + double power = 1; + for (; exponent; exponent >>= 1, base *= base) + if (exponent & 1) + power *= base; + + result *= power; + } + + *endptr = s; + return ch == '-' ? -result : result; +} + +static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) { + if (!tail) + return node->next = node; + node->next = tail->next; + tail->next = node; + return node; +} + +static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) { + if (tail) { + auto head = tail->next; + tail->next = nullptr; + return JsonValue(tag, head); + } + return JsonValue(tag, nullptr); +} + +int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) { + JsonNode *tails[JSON_STACK_SIZE]; + JsonTag tags[JSON_STACK_SIZE]; + char *keys[JSON_STACK_SIZE]; + JsonValue o; + int pos = -1; + bool separator = true; + JsonNode *node; + *endptr = s; + + while (*s) { + while (isspace(*s)) { + ++s; + if (!*s) break; + } + *endptr = s++; + switch (**endptr) { + case '-': + if (!isdigit(*s) && *s != '.') { + *endptr = s; + return JSON_BAD_NUMBER; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + o = JsonValue(string2double(*endptr, &s)); + if (!isdelim(*s)) { + *endptr = s; + return JSON_BAD_NUMBER; + } + break; + case '"': + o = JsonValue(JSON_STRING, s); + for (char *it = s; *s; ++it, ++s) { + int c = *it = *s; + if (c == '\\') { + c = *++s; + switch (c) { + case '\\': + case '"': + case '/': + *it = c; + break; + case 'b': + *it = '\b'; + break; + case 'f': + *it = '\f'; + break; + case 'n': + *it = '\n'; + break; + case 'r': + *it = '\r'; + break; + case 't': + *it = '\t'; + break; + case 'u': + c = 0; + for (int i = 0; i < 4; ++i) { + if (isxdigit(*++s)) { + c = c * 16 + char2int(*s); + } else { + *endptr = s; + return JSON_BAD_STRING; + } + } + if (c < 0x80) { + *it = c; + } else if (c < 0x800) { + *it++ = 0xC0 | (c >> 6); + *it = 0x80 | (c & 0x3F); + } else { + *it++ = 0xE0 | (c >> 12); + *it++ = 0x80 | ((c >> 6) & 0x3F); + *it = 0x80 | (c & 0x3F); + } + break; + default: + *endptr = s; + return JSON_BAD_STRING; + } + } else if ((unsigned int)c < ' ' || c == '\x7F') { + *endptr = s; + return JSON_BAD_STRING; + } else if (c == '"') { + *it = 0; + ++s; + break; + } + } + if (!isdelim(*s)) { + *endptr = s; + return JSON_BAD_STRING; + } + break; + case 't': + if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3]))) + return JSON_BAD_IDENTIFIER; + o = JsonValue(JSON_TRUE); + s += 3; + break; + case 'f': + if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4]))) + return JSON_BAD_IDENTIFIER; + o = JsonValue(JSON_FALSE); + s += 4; + break; + case 'n': + if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3]))) + return JSON_BAD_IDENTIFIER; + o = JsonValue(JSON_NULL); + s += 3; + break; + case ']': + if (pos == -1) + return JSON_STACK_UNDERFLOW; + if (tags[pos] != JSON_ARRAY) + return JSON_MISMATCH_BRACKET; + o = listToValue(JSON_ARRAY, tails[pos--]); + break; + case '}': + if (pos == -1) + return JSON_STACK_UNDERFLOW; + if (tags[pos] != JSON_OBJECT) + return JSON_MISMATCH_BRACKET; + if (keys[pos] != nullptr) + return JSON_UNEXPECTED_CHARACTER; + o = listToValue(JSON_OBJECT, tails[pos--]); + break; + case '[': + if (++pos == JSON_STACK_SIZE) + return JSON_STACK_OVERFLOW; + tails[pos] = nullptr; + tags[pos] = JSON_ARRAY; + keys[pos] = nullptr; + separator = true; + continue; + case '{': + if (++pos == JSON_STACK_SIZE) + return JSON_STACK_OVERFLOW; + tails[pos] = nullptr; + tags[pos] = JSON_OBJECT; + keys[pos] = nullptr; + separator = true; + continue; + case ':': + if (separator || keys[pos] == nullptr) + return JSON_UNEXPECTED_CHARACTER; + separator = true; + continue; + case ',': + if (separator || keys[pos] != nullptr) + return JSON_UNEXPECTED_CHARACTER; + separator = true; + continue; + case '\0': + continue; + default: + return JSON_UNEXPECTED_CHARACTER; + } + + separator = false; + + if (pos == -1) { + *endptr = s; + *value = o; + return JSON_OK; + } + + if (tags[pos] == JSON_OBJECT) { + if (!keys[pos]) { + if (o.getTag() != JSON_STRING) + return JSON_UNQUOTED_KEY; + keys[pos] = o.toString(); + continue; + } + if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr) + return JSON_ALLOCATION_FAILURE; + tails[pos] = insertAfter(tails[pos], node); + tails[pos]->key = keys[pos]; + keys[pos] = nullptr; + } else { + if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr) + return JSON_ALLOCATION_FAILURE; + tails[pos] = insertAfter(tails[pos], node); + } + tails[pos]->value = o; + } + return JSON_BREAKING_BAD; +} diff --git a/nlp_metrics/common/gason.h b/nlp_metrics/common/gason.h new file mode 100644 index 0000000..2e728d9 --- /dev/null +++ b/nlp_metrics/common/gason.h @@ -0,0 +1,136 @@ +// https://github.com/vivkin/gason - pulled January 10, 2016 +#pragma once + +#include +#include +#include + +enum JsonTag { + JSON_NUMBER = 0, + JSON_STRING, + JSON_ARRAY, + JSON_OBJECT, + JSON_TRUE, + JSON_FALSE, + JSON_NULL = 0xF +}; + +struct JsonNode; + +#define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL +#define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL +#define JSON_VALUE_TAG_MASK 0xF +#define JSON_VALUE_TAG_SHIFT 47 + +union JsonValue { + uint64_t ival; + double fval; + + JsonValue(double x) + : fval(x) { + } + JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { + assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); + ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; + } + bool isDouble() const { + return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; + } + JsonTag getTag() const { + return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); + } + uint64_t getPayload() const { + assert(!isDouble()); + return ival & JSON_VALUE_PAYLOAD_MASK; + } + double toNumber() const { + assert(getTag() == JSON_NUMBER); + return fval; + } + char *toString() const { + assert(getTag() == JSON_STRING); + return (char *)getPayload(); + } + JsonNode *toNode() const { + assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); + return (JsonNode *)getPayload(); + } +}; + +struct JsonNode { + JsonValue value; + JsonNode *next; + char *key; +}; + +struct JsonIterator { + JsonNode *p; + + void operator++() { + p = p->next; + } + bool operator!=(const JsonIterator &x) const { + return p != x.p; + } + JsonNode *operator*() const { + return p; + } + JsonNode *operator->() const { + return p; + } +}; + +inline JsonIterator begin(JsonValue o) { + return JsonIterator{o.toNode()}; +} +inline JsonIterator end(JsonValue) { + return JsonIterator{nullptr}; +} + +#define JSON_ERRNO_MAP(XX) \ + XX(OK, "ok") \ + XX(BAD_NUMBER, "bad number") \ + XX(BAD_STRING, "bad string") \ + XX(BAD_IDENTIFIER, "bad identifier") \ + XX(STACK_OVERFLOW, "stack overflow") \ + XX(STACK_UNDERFLOW, "stack underflow") \ + XX(MISMATCH_BRACKET, "mismatch bracket") \ + XX(UNEXPECTED_CHARACTER, "unexpected character") \ + XX(UNQUOTED_KEY, "unquoted key") \ + XX(BREAKING_BAD, "breaking bad") \ + XX(ALLOCATION_FAILURE, "allocation failure") + +enum JsonErrno { +#define XX(no, str) JSON_##no, + JSON_ERRNO_MAP(XX) +#undef XX +}; + +const char *jsonStrError(int err); + +class JsonAllocator { + struct Zone { + Zone *next; + size_t used; + } *head = nullptr; + +public: + JsonAllocator() = default; + JsonAllocator(const JsonAllocator &) = delete; + JsonAllocator &operator=(const JsonAllocator &) = delete; + JsonAllocator(JsonAllocator &&x) : head(x.head) { + x.head = nullptr; + } + JsonAllocator &operator=(JsonAllocator &&x) { + head = x.head; + x.head = nullptr; + return *this; + } + ~JsonAllocator() { + deallocate(); + } + void *allocate(size_t size); + void deallocate(); +}; + +int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); diff --git a/nlp_metrics/common/maskApi.c b/nlp_metrics/common/maskApi.c new file mode 100644 index 0000000..917dc28 --- /dev/null +++ b/nlp_metrics/common/maskApi.c @@ -0,0 +1,231 @@ +/************************************************************************** +* Microsoft COCO Toolbox. version 2.0 +* Data, paper, and tutorials available at: http://mscoco.org/ +* Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +* Licensed under the Simplified BSD License [see coco/license.txt] +**************************************************************************/ +#include "maskApi.h" +#include +#include + +uint umin( uint a, uint b ) { return (ab) ? a : b; } + +void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) { + R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m); + siz j; if(cnts) for(j=0; jcnts[j]=cnts[j]; +} + +void rleFree( RLE *R ) { + free(R->cnts); R->cnts=0; +} + +void rlesInit( RLE **R, siz n ) { + siz i; *R = (RLE*) malloc(sizeof(RLE)*n); + for(i=0; i0 ) { + c=umin(ca,cb); cc+=c; ct=0; + ca-=c; if(!ca && a0) { + crowd=iscrowd!=NULL && iscrowd[g]; + if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; } + siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb; + ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0; + cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1; + while( ct>0 ) { + c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0; + ca-=c; if(!ca && athr) keep[j]=0; + } + } +} + +void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) { + double h, w, i, u, ga, da; siz g, d; int crowd; + for( g=0; gthr) keep[j]=0; + } + } +} + +void rleToBbox( const RLE *R, BB bb, siz n ) { + siz i; for( i=0; id?1:c=dy && xs>xe) || (dxye); + if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; } + s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy; + if(dx>=dy) for( d=0; d<=dx; d++ ) { + t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++; + } else for( d=0; d<=dy; d++ ) { + t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++; + } + } + /* get points along y-boundary and downsample */ + free(x); free(y); k=m; m=0; double xd, yd; + x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k); + for( j=1; jw-1 ) continue; + yd=(double)(v[j]h) yd=h; yd=ceil(yd); + x[m]=(int) xd; y[m]=(int) yd; m++; + } + /* compute rle encoding given y-boundary points */ + k=m; a=malloc(sizeof(uint)*(k+1)); + for( j=0; j0) b[m++]=a[j++]; else { + j++; if(jm, p=0; long x; int more; + char *s=malloc(sizeof(char)*m*6); + for( i=0; icnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1; + while( more ) { + char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0; + if(more) c |= 0x20; c+=48; s[p++]=c; + } + } + s[p]=0; return s; +} + +void rleFrString( RLE *R, char *s, siz h, siz w ) { + siz m=0, p=0, k; long x; int more; uint *cnts; + while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0; + while( s[p] ) { + x=0; k=0; more=1; + while( more ) { + char c=s[p]-48; x |= (c & 0x1f) << 5*k; + more = c & 0x20; p++; k++; + if(!more && (c & 0x10)) x |= -1 << 5*k; + } + if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x; + } + rleInit(R,h,w,m,cnts); free(cnts); +} diff --git a/nlp_metrics/common/maskApi.h b/nlp_metrics/common/maskApi.h new file mode 100644 index 0000000..ebc7892 --- /dev/null +++ b/nlp_metrics/common/maskApi.h @@ -0,0 +1,60 @@ +/************************************************************************** +* Microsoft COCO Toolbox. version 2.0 +* Data, paper, and tutorials available at: http://mscoco.org/ +* Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +* Licensed under the Simplified BSD License [see coco/license.txt] +**************************************************************************/ +#pragma once + +typedef unsigned int uint; +typedef unsigned long siz; +typedef unsigned char byte; +typedef double* BB; +typedef struct { siz h, w, m; uint *cnts; } RLE; + +/* Initialize/destroy RLE. */ +void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); +void rleFree( RLE *R ); + +/* Initialize/destroy RLE array. */ +void rlesInit( RLE **R, siz n ); +void rlesFree( RLE **R, siz n ); + +/* Encode binary masks using RLE. */ +void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); + +/* Decode binary masks encoded via RLE. */ +void rleDecode( const RLE *R, byte *mask, siz n ); + +/* Compute union or intersection of encoded masks. */ +void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); + +/* Compute area of encoded masks. */ +void rleArea( const RLE *R, siz n, uint *a ); + +/* Compute intersection over union between masks. */ +void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); + +/* Compute non-maximum suppression between bounding masks */ +void rleNms( RLE *dt, siz n, uint *keep, double thr ); + +/* Compute intersection over union between bounding boxes. */ +void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); + +/* Compute non-maximum suppression between bounding boxes */ +void bbNms( BB dt, siz n, uint *keep, double thr ); + +/* Get bounding boxes surrounding encoded masks. */ +void rleToBbox( const RLE *R, BB bb, siz n ); + +/* Convert bounding boxes to encoded masks. */ +void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); + +/* Convert polygon to encoded mask. */ +void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); + +/* Get compressed string representation of encoded mask. */ +char* rleToString( const RLE *R ); + +/* Convert from compressed string representation of encoded mask. */ +void rleFrString( RLE *R, char *s, siz h, siz w ); diff --git a/nlp_metrics/pycocotools/__init__.py b/nlp_metrics/pycocotools/__init__.py new file mode 100644 index 0000000..3f7d85b --- /dev/null +++ b/nlp_metrics/pycocotools/__init__.py @@ -0,0 +1 @@ +__author__ = 'tylin' diff --git a/nlp_metrics/pycocotools/_mask.pyx b/nlp_metrics/pycocotools/_mask.pyx new file mode 100644 index 0000000..d065837 --- /dev/null +++ b/nlp_metrics/pycocotools/_mask.pyx @@ -0,0 +1,308 @@ +# distutils: language = c +# distutils: sources = ../common/maskApi.c + +#************************************************************************** +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +# Licensed under the Simplified BSD License [see coco/license.txt] +#************************************************************************** + +__author__ = 'tsungyi' + +import sys +PYTHON_VERSION = sys.version_info[0] + +# import both Python-level and C-level symbols of Numpy +# the API uses Numpy to interface C and Python +import numpy as np +cimport numpy as np +from libc.stdlib cimport malloc, free + +# intialized Numpy. must do. +np.import_array() + +# import numpy C function +# we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management +cdef extern from "numpy/arrayobject.h": + void PyArray_ENABLEFLAGS(np.ndarray arr, int flags) + +# Declare the prototype of the C functions in MaskApi.h +cdef extern from "maskApi.h": + ctypedef unsigned int uint + ctypedef unsigned long siz + ctypedef unsigned char byte + ctypedef double* BB + ctypedef struct RLE: + siz h, + siz w, + siz m, + uint* cnts, + void rlesInit( RLE **R, siz n ) + void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) + void rleDecode( const RLE *R, byte *mask, siz n ) + void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) + void rleArea( const RLE *R, siz n, uint *a ) + void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) + void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) + void rleToBbox( const RLE *R, BB bb, siz n ) + void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) + void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) + char* rleToString( const RLE *R ) + void rleFrString( RLE *R, char *s, siz h, siz w ) + +# python class to wrap RLE array in C +# the class handles the memory allocation and deallocation +cdef class RLEs: + cdef RLE *_R + cdef siz _n + + def __cinit__(self, siz n =0): + rlesInit(&self._R, n) + self._n = n + + # free the RLE array here + def __dealloc__(self): + if self._R is not NULL: + for i in range(self._n): + free(self._R[i].cnts) + free(self._R) + def __getattr__(self, key): + if key == 'n': + return self._n + raise AttributeError(key) + +# python class to wrap Mask array in C +# the class handles the memory allocation and deallocation +cdef class Masks: + cdef byte *_mask + cdef siz _h + cdef siz _w + cdef siz _n + + def __cinit__(self, h, w, n): + self._mask = malloc(h*w*n* sizeof(byte)) + self._h = h + self._w = w + self._n = n + # def __dealloc__(self): + # the memory management of _mask has been passed to np.ndarray + # it doesn't need to be freed here + + # called when passing into np.array() and return an np.ndarray in column-major order + def __array__(self): + cdef np.npy_intp shape[1] + shape[0] = self._h*self._w*self._n + # Create a 1D array, and reshape it to fortran/Matlab column-major array + ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F') + # The _mask allocated by Masks is now handled by ndarray + PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA) + return ndarray + +# internal conversion from Python RLEs object to compressed RLE format +def _toString(RLEs Rs): + cdef siz n = Rs.n + cdef bytes py_string + cdef char* c_string + objs = [] + for i in range(n): + c_string = rleToString( &Rs._R[i] ) + py_string = c_string + objs.append({ + 'size': [Rs._R[i].h, Rs._R[i].w], + 'counts': py_string + }) + free(c_string) + return objs + +# internal conversion from compressed RLE format to Python RLEs object +def _frString(rleObjs): + cdef siz n = len(rleObjs) + Rs = RLEs(n) + cdef bytes py_string + cdef char* c_string + for i, obj in enumerate(rleObjs): + if PYTHON_VERSION == 2: + py_string = str(obj['counts']).encode('utf8') + elif PYTHON_VERSION == 3: + py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts'] + else: + raise Exception('Python version must be 2 or 3') + c_string = py_string + rleFrString( &Rs._R[i], c_string, obj['size'][0], obj['size'][1] ) + return Rs + +# encode mask to RLEs objects +# list of RLE string can be generated by RLEs member function +def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask): + h, w, n = mask.shape[0], mask.shape[1], mask.shape[2] + cdef RLEs Rs = RLEs(n) + rleEncode(Rs._R,mask.data,h,w,n) + objs = _toString(Rs) + return objs + +# decode mask from compressed list of RLE string or RLEs object +def decode(rleObjs): + cdef RLEs Rs = _frString(rleObjs) + h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n + masks = Masks(h, w, n) + rleDecode(Rs._R, masks._mask, n); + return np.array(masks) + +def merge(rleObjs, intersect=0): + cdef RLEs Rs = _frString(rleObjs) + cdef RLEs R = RLEs(1) + rleMerge(Rs._R, R._R, Rs._n, intersect) + obj = _toString(R)[0] + return obj + +def area(rleObjs): + cdef RLEs Rs = _frString(rleObjs) + cdef uint* _a = malloc(Rs._n* sizeof(uint)) + rleArea(Rs._R, Rs._n, _a) + cdef np.npy_intp shape[1] + shape[0] = Rs._n + a = np.array((Rs._n, ), dtype=np.uint8) + a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a) + PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA) + return a + +# iou computation. support function overload (RLEs-RLEs and bbox-bbox). +def iou( dt, gt, pyiscrowd ): + def _preproc(objs): + if len(objs) == 0: + return objs + if type(objs) == np.ndarray: + if len(objs.shape) == 1: + objs = objs.reshape((objs[0], 1)) + # check if it's Nx4 bbox + if not len(objs.shape) == 2 or not objs.shape[1] == 4: + raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension') + objs = objs.astype(np.double) + elif type(objs) == list: + # check if list is in box format and convert it to np.ndarray + isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs])) + isrle = np.all(np.array([type(obj) == dict for obj in objs])) + if isbox: + objs = np.array(objs, dtype=np.double) + if len(objs.shape) == 1: + objs = objs.reshape((1,objs.shape[0])) + elif isrle: + objs = _frString(objs) + else: + raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])') + else: + raise Exception('unrecognized type. The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.') + return objs + def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): + rleIou( dt._R, gt._R, m, n, iscrowd.data, _iou.data ) + def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou): + bbIou( dt.data, gt.data, m, n, iscrowd.data, _iou.data ) + def _len(obj): + cdef siz N = 0 + if type(obj) == RLEs: + N = obj.n + elif len(obj)==0: + pass + elif type(obj) == np.ndarray: + N = obj.shape[0] + return N + # convert iscrowd to numpy array + cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8) + # simple type checking + cdef siz m, n + dt = _preproc(dt) + gt = _preproc(gt) + m = _len(dt) + n = _len(gt) + if m == 0 or n == 0: + return [] + if not type(dt) == type(gt): + raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray') + + # define local variables + cdef double* _iou = 0 + cdef np.npy_intp shape[1] + # check type and assign iou function + if type(dt) == RLEs: + _iouFun = _rleIou + elif type(dt) == np.ndarray: + _iouFun = _bbIou + else: + raise Exception('input data type not allowed.') + _iou = malloc(m*n* sizeof(double)) + iou = np.zeros((m*n, ), dtype=np.double) + shape[0] = m*n + iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou) + PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA) + _iouFun(dt, gt, iscrowd, m, n, iou) + return iou.reshape((m,n), order='F') + +def toBbox( rleObjs ): + cdef RLEs Rs = _frString(rleObjs) + cdef siz n = Rs.n + cdef BB _bb = malloc(4*n* sizeof(double)) + rleToBbox( Rs._R, _bb, n ) + cdef np.npy_intp shape[1] + shape[0] = 4*n + bb = np.array((1,4*n), dtype=np.double) + bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4)) + PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA) + return bb + +def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ): + cdef siz n = bb.shape[0] + Rs = RLEs(n) + rleFrBbox( Rs._R, bb.data, h, w, n ) + objs = _toString(Rs) + return objs + +def frPoly( poly, siz h, siz w ): + cdef np.ndarray[np.double_t, ndim=1] np_poly + n = len(poly) + Rs = RLEs(n) + for i, p in enumerate(poly): + np_poly = np.array(p, dtype=np.double, order='F') + rleFrPoly( &Rs._R[i], np_poly.data, int(len(p)/2), h, w ) + objs = _toString(Rs) + return objs + +def frUncompressedRLE(ucRles, siz h, siz w): + cdef np.ndarray[np.uint32_t, ndim=1] cnts + cdef RLE R + cdef uint *data + n = len(ucRles) + objs = [] + for i in range(n): + Rs = RLEs(1) + cnts = np.array(ucRles[i]['counts'], dtype=np.uint32) + # time for malloc can be saved here but it's fine + data = malloc(len(cnts)* sizeof(uint)) + for j in range(len(cnts)): + data[j] = cnts[j] + R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), data) + Rs._R[0] = R + objs.append(_toString(Rs)[0]) + return objs + +def frPyObjects(pyobj, h, w): + # encode rle from a list of python objects + if type(pyobj) == np.ndarray: + objs = frBbox(pyobj, h, w) + elif type(pyobj) == list and len(pyobj[0]) == 4: + objs = frBbox(pyobj, h, w) + elif type(pyobj) == list and len(pyobj[0]) > 4: + objs = frPoly(pyobj, h, w) + elif type(pyobj) == list and type(pyobj[0]) == dict \ + and 'counts' in pyobj[0] and 'size' in pyobj[0]: + objs = frUncompressedRLE(pyobj, h, w) + # encode rle from single python object + elif type(pyobj) == list and len(pyobj) == 4: + objs = frBbox([pyobj], h, w)[0] + elif type(pyobj) == list and len(pyobj) > 4: + objs = frPoly([pyobj], h, w)[0] + elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj: + objs = frUncompressedRLE([pyobj], h, w)[0] + else: + raise Exception('input type is not supported.') + return objs diff --git a/nlp_metrics/pycocotools/coco.py b/nlp_metrics/pycocotools/coco.py new file mode 100644 index 0000000..fecbde9 --- /dev/null +++ b/nlp_metrics/pycocotools/coco.py @@ -0,0 +1,441 @@ +__author__ = 'tylin' +__version__ = '2.0' +# Interface for accessing the Microsoft COCO dataset. + +# Microsoft COCO is a large image dataset designed for object detection, +# segmentation, and caption generation. pycocotools is a Python API that +# assists in loading, parsing and visualizing the annotations in COCO. +# Please visit http://mscoco.org/ for more information on COCO, including +# for the data, paper, and tutorials. The exact format of the annotations +# is also described on the COCO website. For example usage of the pycocotools +# please see pycocotools_demo.ipynb. In addition to this API, please download both +# the COCO images and annotations in order to run the demo. + +# An alternative to using the API is to load the annotations directly +# into Python dictionary +# Using the API provides additional utility functions. Note that this API +# supports both *instance* and *caption* annotations. In the case of +# captions not all functions are defined (e.g. categories are undefined). + +# The following API functions are defined: +# COCO - COCO api class that loads COCO annotation file and prepare data structures. +# decodeMask - Decode binary mask M encoded via run-length encoding. +# encodeMask - Encode binary mask M using run-length encoding. +# getAnnIds - Get ann ids that satisfy given filter conditions. +# getCatIds - Get cat ids that satisfy given filter conditions. +# getImgIds - Get img ids that satisfy given filter conditions. +# loadAnns - Load anns with the specified ids. +# loadCats - Load cats with the specified ids. +# loadImgs - Load imgs with the specified ids. +# annToMask - Convert segmentation in an annotation to binary mask. +# showAnns - Display the specified annotations. +# loadRes - Load algorithm results and create API for accessing them. +# download - Download COCO images from mscoco.org server. +# Throughout the API "ann"=annotation, "cat"=category, and "img"=image. +# Help on each functions can be accessed by: "help COCO>function". + +# See also COCO>decodeMask, +# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds, +# COCO>getImgIds, COCO>loadAnns, COCO>loadCats, +# COCO>loadImgs, COCO>annToMask, COCO>showAnns + +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2014. +# Licensed under the Simplified BSD License [see bsd.txt] + +import json +import time +import matplotlib.pyplot as plt +from matplotlib.collections import PatchCollection +from matplotlib.patches import Polygon +import numpy as np +import copy +import itertools +from nlp_metrics.pycocotools import mask as maskUtils +import os +from collections import defaultdict +import sys +PYTHON_VERSION = sys.version_info[0] +if PYTHON_VERSION == 2: + from urllib import urlretrieve +elif PYTHON_VERSION == 3: + from urllib.request import urlretrieve + + +def _isArrayLike(obj): + return hasattr(obj, '__iter__') and hasattr(obj, '__len__') + + +class COCO: + def __init__(self, annotation_file=None): + """ + Constructor of Microsoft COCO helper class for reading and visualizing annotations. + :param annotation_file (str): location of annotation file + :param image_folder (str): location to the folder that hosts images. + :return: + """ + # load dataset + self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict() + self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list) + if not annotation_file == None: + print('loading annotations into memory...') + tic = time.time() + dataset = json.load(open(annotation_file, 'r')) + assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset)) + print('Done (t={:0.2f}s)'.format(time.time()- tic)) + self.dataset = dataset + self.createIndex() + + def createIndex(self): + # create index + print('creating index...') + anns, cats, imgs = {}, {}, {} + imgToAnns,catToImgs = defaultdict(list),defaultdict(list) + if 'annotations' in self.dataset: + for ann in self.dataset['annotations']: + imgToAnns[ann['image_id']].append(ann) + anns[ann['id']] = ann + + if 'images' in self.dataset: + for img in self.dataset['images']: + imgs[img['id']] = img + + if 'categories' in self.dataset: + for cat in self.dataset['categories']: + cats[cat['id']] = cat + + if 'annotations' in self.dataset and 'categories' in self.dataset: + for ann in self.dataset['annotations']: + catToImgs[ann['category_id']].append(ann['image_id']) + + print('index created!') + + # create class members + self.anns = anns + self.imgToAnns = imgToAnns + self.catToImgs = catToImgs + self.imgs = imgs + self.cats = cats + + def info(self): + """ + Print information about the annotation file. + :return: + """ + for key, value in self.dataset['info'].items(): + print('{}: {}'.format(key, value)) + + def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None): + """ + Get ann ids that satisfy given filter conditions. default skips that filter + :param imgIds (int array) : get anns for given imgs + catIds (int array) : get anns for given cats + areaRng (float array) : get anns for given area range (e.g. [0 inf]) + iscrowd (boolean) : get anns for given crowd label (False or True) + :return: ids (int array) : integer array of ann ids + """ + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == len(areaRng) == 0: + anns = self.dataset['annotations'] + else: + if not len(imgIds) == 0: + lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns] + anns = list(itertools.chain.from_iterable(lists)) + else: + anns = self.dataset['annotations'] + anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds] + anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]] + if not iscrowd == None: + ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd] + else: + ids = [ann['id'] for ann in anns] + return ids + + def getCatIds(self, catNms=[], supNms=[], catIds=[]): + """ + filtering parameters. default skips that filter. + :param catNms (str array) : get cats for given cat names + :param supNms (str array) : get cats for given supercategory names + :param catIds (int array) : get cats for given cat ids + :return: ids (int array) : integer array of cat ids + """ + catNms = catNms if _isArrayLike(catNms) else [catNms] + supNms = supNms if _isArrayLike(supNms) else [supNms] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(catNms) == len(supNms) == len(catIds) == 0: + cats = self.dataset['categories'] + else: + cats = self.dataset['categories'] + cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms] + cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms] + cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds] + ids = [cat['id'] for cat in cats] + return ids + + def getImgIds(self, imgIds=[], catIds=[]): + ''' + Get img ids that satisfy given filter conditions. + :param imgIds (int array) : get imgs for given ids + :param catIds (int array) : get imgs with all given cats + :return: ids (int array) : integer array of img ids + ''' + imgIds = imgIds if _isArrayLike(imgIds) else [imgIds] + catIds = catIds if _isArrayLike(catIds) else [catIds] + + if len(imgIds) == len(catIds) == 0: + ids = self.imgs.keys() + else: + ids = set(imgIds) + for i, catId in enumerate(catIds): + if i == 0 and len(ids) == 0: + ids = set(self.catToImgs[catId]) + else: + ids &= set(self.catToImgs[catId]) + return list(ids) + + def loadAnns(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying anns + :return: anns (object array) : loaded ann objects + """ + if _isArrayLike(ids): + return [self.anns[id] for id in ids] + elif type(ids) == int: + return [self.anns[ids]] + + def loadCats(self, ids=[]): + """ + Load cats with the specified ids. + :param ids (int array) : integer ids specifying cats + :return: cats (object array) : loaded cat objects + """ + if _isArrayLike(ids): + return [self.cats[id] for id in ids] + elif type(ids) == int: + return [self.cats[ids]] + + def loadImgs(self, ids=[]): + """ + Load anns with the specified ids. + :param ids (int array) : integer ids specifying img + :return: imgs (object array) : loaded img objects + """ + if _isArrayLike(ids): + return [self.imgs[id] for id in ids] + elif type(ids) == int: + return [self.imgs[ids]] + + def showAnns(self, anns, draw_bbox=False): + """ + Display the specified annotations. + :param anns (array of object): annotations to display + :return: None + """ + if len(anns) == 0: + return 0 + if 'segmentation' in anns[0] or 'keypoints' in anns[0]: + datasetType = 'instances' + elif 'caption' in anns[0]: + datasetType = 'captions' + else: + raise Exception('datasetType not supported') + if datasetType == 'instances': + ax = plt.gca() + ax.set_autoscale_on(False) + polygons = [] + color = [] + for ann in anns: + c = (np.random.random((1, 3))*0.6+0.4).tolist()[0] + if 'segmentation' in ann: + if type(ann['segmentation']) == list: + # polygon + for seg in ann['segmentation']: + poly = np.array(seg).reshape((int(len(seg)/2), 2)) + polygons.append(Polygon(poly)) + color.append(c) + else: + # mask + t = self.imgs[ann['image_id']] + if type(ann['segmentation']['counts']) == list: + rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width']) + else: + rle = [ann['segmentation']] + m = maskUtils.decode(rle) + img = np.ones( (m.shape[0], m.shape[1], 3) ) + if ann['iscrowd'] == 1: + color_mask = np.array([2.0,166.0,101.0])/255 + if ann['iscrowd'] == 0: + color_mask = np.random.random((1, 3)).tolist()[0] + for i in range(3): + img[:,:,i] = color_mask[i] + ax.imshow(np.dstack( (img, m*0.5) )) + if 'keypoints' in ann and type(ann['keypoints']) == list: + # turn skeleton into zero-based index + sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1 + kp = np.array(ann['keypoints']) + x = kp[0::3] + y = kp[1::3] + v = kp[2::3] + for sk in sks: + if np.all(v[sk]>0): + plt.plot(x[sk],y[sk], linewidth=3, color=c) + plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2) + plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2) + + if draw_bbox: + [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox'] + poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]] + np_poly = np.array(poly).reshape((4,2)) + polygons.append(Polygon(np_poly)) + color.append(c) + + p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4) + ax.add_collection(p) + p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2) + ax.add_collection(p) + elif datasetType == 'captions': + for ann in anns: + print(ann['caption']) + + def loadRes(self, resFile): + """ + Load result file and return a result api object. + :param resFile (str) : file name of result file + :return: res (obj) : result api object + """ + res = COCO() + res.dataset['images'] = [img for img in self.dataset['images']] + + print('Loading and preparing results...') + tic = time.time() + if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode): + anns = json.load(open(resFile)) + elif type(resFile) == np.ndarray: + anns = self.loadNumpyAnnotations(resFile) + else: + anns = resFile + assert type(anns) == list, 'results in not an array of objects' + annsImgIds = [ann['image_id'] for ann in anns] + assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ + 'Results do not correspond to current coco set' + if 'caption' in anns[0]: + imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) + res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] + for id, ann in enumerate(anns): + ann['id'] = id+1 + elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + bb = ann['bbox'] + x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]] + if not 'segmentation' in ann: + ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] + ann['area'] = bb[2]*bb[3] + ann['id'] = id+1 + ann['iscrowd'] = 0 + elif 'segmentation' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + # now only support compressed RLE format as segmentation results + ann['area'] = maskUtils.area(ann['segmentation']) + if not 'bbox' in ann: + ann['bbox'] = maskUtils.toBbox(ann['segmentation']) + ann['id'] = id+1 + ann['iscrowd'] = 0 + elif 'keypoints' in anns[0]: + res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) + for id, ann in enumerate(anns): + s = ann['keypoints'] + x = s[0::3] + y = s[1::3] + x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y) + ann['area'] = (x1-x0)*(y1-y0) + ann['id'] = id + 1 + ann['bbox'] = [x0,y0,x1-x0,y1-y0] + print('DONE (t={:0.2f}s)'.format(time.time()- tic)) + + res.dataset['annotations'] = anns + res.createIndex() + return res + + def download(self, tarDir = None, imgIds = [] ): + ''' + Download COCO images from mscoco.org server. + :param tarDir (str): COCO results directory name + imgIds (list): images to be downloaded + :return: + ''' + if tarDir is None: + print('Please specify target directory') + return -1 + if len(imgIds) == 0: + imgs = self.imgs.values() + else: + imgs = self.loadImgs(imgIds) + N = len(imgs) + if not os.path.exists(tarDir): + os.makedirs(tarDir) + for i, img in enumerate(imgs): + tic = time.time() + fname = os.path.join(tarDir, img['file_name']) + if not os.path.exists(fname): + urlretrieve(img['coco_url'], fname) + print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic)) + + def loadNumpyAnnotations(self, data): + """ + Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class} + :param data (numpy.ndarray) + :return: annotations (python nested list) + """ + print('Converting ndarray to lists...') + assert(type(data) == np.ndarray) + print(data.shape) + assert(data.shape[1] == 7) + N = data.shape[0] + ann = [] + for i in range(N): + if i % 1000000 == 0: + print('{}/{}'.format(i,N)) + ann += [{ + 'image_id' : int(data[i, 0]), + 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ], + 'score' : data[i, 5], + 'category_id': int(data[i, 6]), + }] + return ann + + def annToRLE(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE to RLE. + :return: binary mask (numpy 2D array) + """ + t = self.imgs[ann['image_id']] + h, w = t['height'], t['width'] + segm = ann['segmentation'] + if type(segm) == list: + # polygon -- a single object might consist of multiple parts + # we merge all parts into one mask rle code + rles = maskUtils.frPyObjects(segm, h, w) + rle = maskUtils.merge(rles) + elif type(segm['counts']) == list: + # uncompressed RLE + rle = maskUtils.frPyObjects(segm, h, w) + else: + # rle + rle = ann['segmentation'] + return rle + + def annToMask(self, ann): + """ + Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. + :return: binary mask (numpy 2D array) + """ + rle = self.annToRLE(ann) + m = maskUtils.decode(rle) + return m \ No newline at end of file diff --git a/nlp_metrics/pycocotools/cocoeval.py b/nlp_metrics/pycocotools/cocoeval.py new file mode 100644 index 0000000..f4e3dec --- /dev/null +++ b/nlp_metrics/pycocotools/cocoeval.py @@ -0,0 +1,534 @@ +__author__ = 'tsungyi' + +import numpy as np +import datetime +import time +from collections import defaultdict +from . import mask as maskUtils +import copy + +class COCOeval: + # Interface for evaluating detection on the Microsoft COCO dataset. + # + # The usage for CocoEval is as follows: + # cocoGt=..., cocoDt=... # load dataset and results + # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object + # E.params.recThrs = ...; # set parameters as desired + # E.evaluate(); # run per image evaluation + # E.accumulate(); # accumulate per image results + # E.summarize(); # display summary metrics of results + # For example usage see evalDemo.m and http://mscoco.org/. + # + # The evaluation parameters are as follows (defaults in brackets): + # imgIds - [all] N img ids to use for evaluation + # catIds - [all] K cat ids to use for evaluation + # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation + # recThrs - [0:.01:1] R=101 recall thresholds for evaluation + # areaRng - [...] A=4 object area ranges for evaluation + # maxDets - [1 10 100] M=3 thresholds on max detections per image + # iouType - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints' + # iouType replaced the now DEPRECATED useSegm parameter. + # useCats - [1] if true use category labels for evaluation + # Note: if useCats=0 category labels are ignored as in proposal scoring. + # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified. + # + # evaluate(): evaluates detections on every image and every category and + # concats the results into the "evalImgs" with fields: + # dtIds - [1xD] id for each of the D detections (dt) + # gtIds - [1xG] id for each of the G ground truths (gt) + # dtMatches - [TxD] matching gt id at each IoU or 0 + # gtMatches - [TxG] matching dt id at each IoU or 0 + # dtScores - [1xD] confidence of each dt + # gtIgnore - [1xG] ignore flag for each gt + # dtIgnore - [TxD] ignore flag for each dt at each IoU + # + # accumulate(): accumulates the per-image, per-category evaluation + # results in "evalImgs" into the dictionary "eval" with fields: + # params - parameters used for evaluation + # date - date evaluation was performed + # counts - [T,R,K,A,M] parameter dimensions (see above) + # precision - [TxRxKxAxM] precision for every evaluation setting + # recall - [TxKxAxM] max recall for every evaluation setting + # Note: precision and recall==-1 for settings with no gt objects. + # + # See also coco, mask, pycocoDemo, pycocoEvalDemo + # + # Microsoft COCO Toolbox. version 2.0 + # Data, paper, and tutorials available at: http://mscoco.org/ + # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. + # Licensed under the Simplified BSD License [see coco/license.txt] + def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'): + ''' + Initialize CocoEval using coco APIs for gt and dt + :param cocoGt: coco object with ground truth annotations + :param cocoDt: coco object with detection results + :return: None + ''' + if not iouType: + print('iouType not specified. use default iouType segm') + self.cocoGt = cocoGt # ground truth COCO API + self.cocoDt = cocoDt # detections COCO API + self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements + self.eval = {} # accumulated evaluation results + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + self.params = Params(iouType=iouType) # parameters + self._paramsEval = {} # parameters for evaluation + self.stats = [] # result summarization + self.ious = {} # ious between all gts and dts + if not cocoGt is None: + self.params.imgIds = sorted(cocoGt.getImgIds()) + self.params.catIds = sorted(cocoGt.getCatIds()) + + + def _prepare(self): + ''' + Prepare ._gts and ._dts for evaluation based on params + :return: None + ''' + def _toMask(anns, coco): + # modify ann['segmentation'] by reference + for ann in anns: + rle = coco.annToRLE(ann) + ann['segmentation'] = rle + p = self.params + if p.useCats: + gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)) + else: + gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds)) + dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds)) + + # convert ground truth to mask if iouType == 'segm' + if p.iouType == 'segm': + _toMask(gts, self.cocoGt) + _toMask(dts, self.cocoDt) + # set ignore flag + for gt in gts: + gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0 + gt['ignore'] = 'iscrowd' in gt and gt['iscrowd'] + if p.iouType == 'keypoints': + gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore'] + self._gts = defaultdict(list) # gt for evaluation + self._dts = defaultdict(list) # dt for evaluation + for gt in gts: + self._gts[gt['image_id'], gt['category_id']].append(gt) + for dt in dts: + self._dts[dt['image_id'], dt['category_id']].append(dt) + self.evalImgs = defaultdict(list) # per-image per-category evaluation results + self.eval = {} # accumulated evaluation results + + def evaluate(self): + ''' + Run per image evaluation on given images and store results (a list of dict) in self.evalImgs + :return: None + ''' + tic = time.time() + print('Running per image evaluation...') + p = self.params + # add backward compatibility if useSegm is specified in params + if not p.useSegm is None: + p.iouType = 'segm' if p.useSegm == 1 else 'bbox' + print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) + print('Evaluate annotation type *{}*'.format(p.iouType)) + p.imgIds = list(np.unique(p.imgIds)) + if p.useCats: + p.catIds = list(np.unique(p.catIds)) + p.maxDets = sorted(p.maxDets) + self.params=p + + self._prepare() + # loop through images, area range, max detection number + catIds = p.catIds if p.useCats else [-1] + + if p.iouType == 'segm' or p.iouType == 'bbox': + computeIoU = self.computeIoU + elif p.iouType == 'keypoints': + computeIoU = self.computeOks + self.ious = {(imgId, catId): computeIoU(imgId, catId) \ + for imgId in p.imgIds + for catId in catIds} + + evaluateImg = self.evaluateImg + maxDet = p.maxDets[-1] + self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet) + for catId in catIds + for areaRng in p.areaRng + for imgId in p.imgIds + ] + self._paramsEval = copy.deepcopy(self.params) + toc = time.time() + print('DONE (t={:0.2f}s).'.format(toc-tic)) + + def computeIoU(self, imgId, catId): + p = self.params + if p.useCats: + gt = self._gts[imgId,catId] + dt = self._dts[imgId,catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] + if len(gt) == 0 and len(dt) ==0: + return [] + inds = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in inds] + if len(dt) > p.maxDets[-1]: + dt=dt[0:p.maxDets[-1]] + + if p.iouType == 'segm': + g = [g['segmentation'] for g in gt] + d = [d['segmentation'] for d in dt] + elif p.iouType == 'bbox': + g = [g['bbox'] for g in gt] + d = [d['bbox'] for d in dt] + else: + raise Exception('unknown iouType for iou computation') + + # compute iou between each dt and gt region + iscrowd = [int(o['iscrowd']) for o in gt] + ious = maskUtils.iou(d,g,iscrowd) + return ious + + def computeOks(self, imgId, catId): + p = self.params + # dimention here should be Nxm + gts = self._gts[imgId, catId] + dts = self._dts[imgId, catId] + inds = np.argsort([-d['score'] for d in dts], kind='mergesort') + dts = [dts[i] for i in inds] + if len(dts) > p.maxDets[-1]: + dts = dts[0:p.maxDets[-1]] + # if len(gts) == 0 and len(dts) == 0: + if len(gts) == 0 or len(dts) == 0: + return [] + ious = np.zeros((len(dts), len(gts))) + sigmas = p.kpt_oks_sigmas + vars = (sigmas * 2)**2 + k = len(sigmas) + # compute oks between each detection and ground truth object + for j, gt in enumerate(gts): + # create bounds for ignore regions(double the gt bbox) + g = np.array(gt['keypoints']) + xg = g[0::3]; yg = g[1::3]; vg = g[2::3] + k1 = np.count_nonzero(vg > 0) + bb = gt['bbox'] + x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 + y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 + for i, dt in enumerate(dts): + d = np.array(dt['keypoints']) + xd = d[0::3]; yd = d[1::3] + if k1>0: + # measure the per-keypoint distance if keypoints visible + dx = xd - xg + dy = yd - yg + else: + # measure minimum distance to keypoints in (x0,y0) & (x1,y1) + z = np.zeros((k)) + dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0) + dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0) + e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2 + if k1 > 0: + e=e[vg > 0] + ious[i, j] = np.sum(np.exp(-e)) / e.shape[0] + return ious + + def evaluateImg(self, imgId, catId, aRng, maxDet): + ''' + perform evaluation for single category and image + :return: dict (single image results) + ''' + p = self.params + if p.useCats: + gt = self._gts[imgId,catId] + dt = self._dts[imgId,catId] + else: + gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]] + dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]] + if len(gt) == 0 and len(dt) ==0: + return None + + for g in gt: + if g['ignore'] or (g['area']aRng[1]): + g['_ignore'] = 1 + else: + g['_ignore'] = 0 + + # sort dt highest score first, sort gt ignore last + gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort') + gt = [gt[i] for i in gtind] + dtind = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in dtind[0:maxDet]] + iscrowd = [int(o['iscrowd']) for o in gt] + # load computed ious + ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId] + + T = len(p.iouThrs) + G = len(gt) + D = len(dt) + gtm = np.zeros((T,G)) + dtm = np.zeros((T,D)) + gtIg = np.array([g['_ignore'] for g in gt]) + dtIg = np.zeros((T,D)) + if not len(ious)==0: + for tind, t in enumerate(p.iouThrs): + for dind, d in enumerate(dt): + # information about best match so far (m=-1 -> unmatched) + iou = min([t,1-1e-10]) + m = -1 + for gind, g in enumerate(gt): + # if this gt already matched, and not a crowd, continue + if gtm[tind,gind]>0 and not iscrowd[gind]: + continue + # if dt matched to reg gt, and on ignore gt, stop + if m>-1 and gtIg[m]==0 and gtIg[gind]==1: + break + # continue to next gt unless better match made + if ious[dind,gind] < iou: + continue + # if match successful and best so far, store appropriately + iou=ious[dind,gind] + m=gind + # if match made store id of match for both dt and gt + if m ==-1: + continue + dtIg[tind,dind] = gtIg[m] + dtm[tind,dind] = gt[m]['id'] + gtm[tind,m] = d['id'] + # set unmatched detections outside of area range to ignore + a = np.array([d['area']aRng[1] for d in dt]).reshape((1, len(dt))) + dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0))) + # store results for given image and category + return { + 'image_id': imgId, + 'category_id': catId, + 'aRng': aRng, + 'maxDet': maxDet, + 'dtIds': [d['id'] for d in dt], + 'gtIds': [g['id'] for g in gt], + 'dtMatches': dtm, + 'gtMatches': gtm, + 'dtScores': [d['score'] for d in dt], + 'gtIgnore': gtIg, + 'dtIgnore': dtIg, + } + + def accumulate(self, p = None): + ''' + Accumulate per image evaluation results and store the result in self.eval + :param p: input params for evaluation + :return: None + ''' + print('Accumulating evaluation results...') + tic = time.time() + if not self.evalImgs: + print('Please run evaluate() first') + # allows input customized parameters + if p is None: + p = self.params + p.catIds = p.catIds if p.useCats == 1 else [-1] + T = len(p.iouThrs) + R = len(p.recThrs) + K = len(p.catIds) if p.useCats else 1 + A = len(p.areaRng) + M = len(p.maxDets) + precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories + recall = -np.ones((T,K,A,M)) + scores = -np.ones((T,R,K,A,M)) + + # create dictionary for future indexing + _pe = self._paramsEval + catIds = _pe.catIds if _pe.useCats else [-1] + setK = set(catIds) + setA = set(map(tuple, _pe.areaRng)) + setM = set(_pe.maxDets) + setI = set(_pe.imgIds) + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds) if k in setK] + m_list = [m for n, m in enumerate(p.maxDets) if m in setM] + a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA] + i_list = [n for n, i in enumerate(p.imgIds) if i in setI] + I0 = len(_pe.imgIds) + A0 = len(_pe.areaRng) + # retrieve E at each category, area range, and max number of detections + for k, k0 in enumerate(k_list): + Nk = k0*A0*I0 + for a, a0 in enumerate(a_list): + Na = a0*I0 + for m, maxDet in enumerate(m_list): + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if len(E) == 0: + continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds] + dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg==0 ) + if npig == 0: + continue + tps = np.logical_and( dtm, np.logical_not(dtIg) ) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) ) + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) + for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)): + tp = np.array(tp) + fp = np.array(fp) + nd = len(tp) + rc = tp / npig + pr = tp / (fp+tp+np.spacing(1)) + q = np.zeros((R,)) + ss = np.zeros((R,)) + + if nd: + recall[t,k,a,m] = rc[-1] + else: + recall[t,k,a,m] = 0 + + # numpy is slow without cython optimization for accessing elements + # use python array gets significant speed improvement + pr = pr.tolist(); q = q.tolist() + + for i in range(nd-1, 0, -1): + if pr[i] > pr[i-1]: + pr[i-1] = pr[i] + + inds = np.searchsorted(rc, p.recThrs, side='left') + try: + for ri, pi in enumerate(inds): + q[ri] = pr[pi] + ss[ri] = dtScoresSorted[pi] + except: + pass + precision[t,:,k,a,m] = np.array(q) + scores[t,:,k,a,m] = np.array(ss) + self.eval = { + 'params': p, + 'counts': [T, R, K, A, M], + 'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'), + 'precision': precision, + 'recall': recall, + 'scores': scores, + } + toc = time.time() + print('DONE (t={:0.2f}s).'.format( toc-tic)) + + def summarize(self): + ''' + Compute and display summary metrics for evaluation results. + Note this functin can *only* be applied on the default parameter setting + ''' + def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ): + p = self.params + iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if ap == 1 else 'Average Recall' + typeStr = '(AP)' if ap==1 else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = self.eval['precision'] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:,:,:,aind,mind] + else: + # dimension of recall: [TxKxAxM] + s = self.eval['recall'] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:,:,aind,mind] + if len(s[s>-1])==0: + mean_s = -1 + else: + mean_s = np.mean(s[s>-1]) + print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)) + return mean_s + def _summarizeDets(): + stats = np.zeros((12,)) + stats[0] = _summarize(1) + stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2]) + stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2]) + stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2]) + stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2]) + stats[6] = _summarize(0, maxDets=self.params.maxDets[0]) + stats[7] = _summarize(0, maxDets=self.params.maxDets[1]) + stats[8] = _summarize(0, maxDets=self.params.maxDets[2]) + stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2]) + stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2]) + stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2]) + return stats + def _summarizeKps(): + stats = np.zeros((10,)) + stats[0] = _summarize(1, maxDets=20) + stats[1] = _summarize(1, maxDets=20, iouThr=.5) + stats[2] = _summarize(1, maxDets=20, iouThr=.75) + stats[3] = _summarize(1, maxDets=20, areaRng='medium') + stats[4] = _summarize(1, maxDets=20, areaRng='large') + stats[5] = _summarize(0, maxDets=20) + stats[6] = _summarize(0, maxDets=20, iouThr=.5) + stats[7] = _summarize(0, maxDets=20, iouThr=.75) + stats[8] = _summarize(0, maxDets=20, areaRng='medium') + stats[9] = _summarize(0, maxDets=20, areaRng='large') + return stats + if not self.eval: + raise Exception('Please run accumulate() first') + iouType = self.params.iouType + if iouType == 'segm' or iouType == 'bbox': + summarize = _summarizeDets + elif iouType == 'keypoints': + summarize = _summarizeKps + self.stats = summarize() + + def __str__(self): + self.summarize() + +class Params: + ''' + Params for coco evaluation api + ''' + def setDetParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True) + self.maxDets = [1, 10, 100] + self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'small', 'medium', 'large'] + self.useCats = 1 + + def setKpParams(self): + self.imgIds = [] + self.catIds = [] + # np.arange causes trouble. the data point on arange is slightly larger than the true value + self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True) + self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True) + self.maxDets = [20] + self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]] + self.areaRngLbl = ['all', 'medium', 'large'] + self.useCats = 1 + self.kpt_oks_sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 + + def __init__(self, iouType='segm'): + if iouType == 'segm' or iouType == 'bbox': + self.setDetParams() + elif iouType == 'keypoints': + self.setKpParams() + else: + raise Exception('iouType not supported') + self.iouType = iouType + # useSegm is deprecated + self.useSegm = None diff --git a/nlp_metrics/pycocotools/mask.py b/nlp_metrics/pycocotools/mask.py new file mode 100644 index 0000000..b5493fe --- /dev/null +++ b/nlp_metrics/pycocotools/mask.py @@ -0,0 +1,103 @@ +__author__ = 'tsungyi' + +import nlp_metrics.pycocotools._mask as _mask + +# Interface for manipulating masks stored in RLE format. +# +# RLE is a simple yet efficient format for storing binary masks. RLE +# first divides a vector (or vectorized image) into a series of piecewise +# constant regions and then for each piece simply stores the length of +# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would +# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] +# (note that the odd counts are always the numbers of zeros). Instead of +# storing the counts directly, additional compression is achieved with a +# variable bitrate representation based on a common scheme called LEB128. +# +# Compression is greatest given large piecewise constant regions. +# Specifically, the size of the RLE is proportional to the number of +# *boundaries* in M (or for an image the number of boundaries in the y +# direction). Assuming fairly simple shapes, the RLE representation is +# O(sqrt(n)) where n is number of pixels in the object. Hence space usage +# is substantially lower, especially for large simple objects (large n). +# +# Many common operations on masks can be computed directly using the RLE +# (without need for decoding). This includes computations such as area, +# union, intersection, etc. All of these operations are linear in the +# size of the RLE, in other words they are O(sqrt(n)) where n is the area +# of the object. Computing these operations on the original mask is O(n). +# Thus, using the RLE can result in substantial computational savings. +# +# The following API functions are defined: +# encode - Encode binary masks using RLE. +# decode - Decode binary masks encoded via RLE. +# merge - Compute union or intersection of encoded masks. +# iou - Compute intersection over union between masks. +# area - Compute area of encoded masks. +# toBbox - Get bounding boxes surrounding encoded masks. +# frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. +# +# Usage: +# Rs = encode( masks ) +# masks = decode( Rs ) +# R = merge( Rs, intersect=false ) +# o = iou( dt, gt, iscrowd ) +# a = area( Rs ) +# bbs = toBbox( Rs ) +# Rs = frPyObjects( [pyObjects], h, w ) +# +# In the API the following formats are used: +# Rs - [dict] Run-length encoding of binary masks +# R - dict Run-length encoding of binary mask +# masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) +# iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore +# bbs - [nx4] Bounding box(es) stored as [x y w h] +# poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) +# dt,gt - May be either bounding boxes or encoded masks +# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). +# +# Finally, a note about the intersection over union (iou) computation. +# The standard iou of a ground truth (gt) and detected (dt) object is +# iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) +# For "crowd" regions, we use a modified criteria. If a gt object is +# marked as "iscrowd", we allow a dt to match any subregion of the gt. +# Choosing gt' in the crowd gt that best matches the dt can be done using +# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing +# iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) +# For crowd gt regions we use this modified criteria above for the iou. +# +# To compile run "python setup.py build_ext --inplace" +# Please do not contact us for help with compiling. +# +# Microsoft COCO Toolbox. version 2.0 +# Data, paper, and tutorials available at: http://mscoco.org/ +# Code written by Piotr Dollar and Tsung-Yi Lin, 2015. +# Licensed under the Simplified BSD License [see coco/license.txt] + +iou = _mask.iou +merge = _mask.merge +frPyObjects = _mask.frPyObjects + +def encode(bimask): + if len(bimask.shape) == 3: + return _mask.encode(bimask) + elif len(bimask.shape) == 2: + h, w = bimask.shape + return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] + +def decode(rleObjs): + if type(rleObjs) == list: + return _mask.decode(rleObjs) + else: + return _mask.decode([rleObjs])[:,:,0] + +def area(rleObjs): + if type(rleObjs) == list: + return _mask.area(rleObjs) + else: + return _mask.area([rleObjs])[0] + +def toBbox(rleObjs): + if type(rleObjs) == list: + return _mask.toBbox(rleObjs) + else: + return _mask.toBbox([rleObjs])[0] \ No newline at end of file diff --git a/nlp_metrics/setup.py b/nlp_metrics/setup.py new file mode 100644 index 0000000..0267835 --- /dev/null +++ b/nlp_metrics/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup, Extension +import numpy as np + +# To compile and install locally run "python setup.py build_ext --inplace" +# To install library to Python site-packages run "python setup.py build_ext install" + +ext_modules = [ + Extension( + 'pycocotools._mask', + sources=['common/maskApi.c', 'pycocotools/_mask.pyx'], + include_dirs = [np.get_include(), 'common'], + extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], + ) +] + +setup( + name='pycocotools', + packages=['pycocotools'], + package_dir = {'pycocotools': 'pycocotools'}, + install_requires=[ + 'setuptools>=18.0', + 'cython>=0.27.3', + 'matplotlib>=2.1.0' + ], + version='2.0', + ext_modules= ext_modules +)