diff --git a/nlp_metrics/Makefile b/nlp_metrics/Makefile
new file mode 100644
index 0000000..ab5e3d2
--- /dev/null
+++ b/nlp_metrics/Makefile
@@ -0,0 +1,9 @@
+all:
+    # install pycocotools locally
+	python setup.py build_ext --inplace
+	rm -rf build
+
+install:
+	# install pycocotools to the Python site-packages
+	python setup.py build_ext install
+	rm -rf build
\ No newline at end of file
diff --git a/nlp_metrics/README.md b/nlp_metrics/README.md
index 0129a69..854f734 100644
--- a/nlp_metrics/README.md
+++ b/nlp_metrics/README.md
@@ -9,21 +9,22 @@ the [COCO Caption Evaluation](https://github.com/tylin/coco-caption) library, an
 - python (tested 2.7/3.6)
 
 ## Files ##
-
-- evals.py: The file includes MIMICEavlCap and COCOEavlCap class.
+- eval: The file includes MIMICEavlCap and COCOEavlCap class.
 - tokenizer: Python wrapper of Stanford CoreNLP PTBTokenizer
-- coco: pycocotools from [cocodataset](https://github.com/cocodataset) / [cocoapi](https://github.com/cocodataset/cocoapi)
 - bleu: Bleu evalutation codes
 - meteor: Meteor evaluation codes
 - rouge: Rouge-L evaluation codes
 - cider: CIDEr evaluation codes
 - spice: SPICE evaluation codes
+- pycocotools and common: Adapted from [COCO Dataset API](https://github.com/cocodataset/cocoapi)
 
 ## Setup ##
-
 - You will first need to download the [Stanford CoreNLP 3.6.0](http://stanfordnlp.github.io/CoreNLP/index.html) code and models for use by SPICE. To do this, run either: 
     - ``python get_stanford_models.py``
     - ``./get_stanford_models.sh``
+    
+- Run ``make`` in this directory to build dependencies for ``pycocotools``. Requires ``cython>=0.27.3``
+    
 ## Notes ##
 - SPICE will try to create a cache of parsed sentences in ./spice/cache/. This dramatically speeds up repeated evaluations. 
     - Without altering this code, use the environment variables ``SPICE_CACHE_DIR`` and ``SPICE_TEMP_DIR`` to set the cache directory.
@@ -32,7 +33,6 @@ the [COCO Caption Evaluation](https://github.com/tylin/coco-caption) library, an
 
 
 ## References ##
-
 - [Microsoft COCO Captions: Data Collection and Evaluation Server](http://arxiv.org/abs/1504.00325)
 - PTBTokenizer: We use the [Stanford Tokenizer](http://nlp.stanford.edu/software/tokenizer.shtml) which is included in [Stanford CoreNLP 3.4.1](http://nlp.stanford.edu/software/corenlp.shtml).
 - BLEU: [BLEU: a Method for Automatic Evaluation of Machine Translation](http://www.aclweb.org/anthology/P02-1040.pdf)
diff --git a/nlp_metrics/coco.py b/nlp_metrics/coco.py
deleted file mode 100644
index 2fc3e9e..0000000
--- a/nlp_metrics/coco.py
+++ /dev/null
@@ -1,372 +0,0 @@
-from __future__ import print_function
-
-__author__ = 'tylin'
-__version__ = '1.0.1'
-# Interface for accessing the Microsoft COCO dataset.
-
-# Microsoft COCO is a large image dataset designed for object detection,
-# segmentation, and caption generation. pycocotools is a Python API that
-# assists in loading, parsing and visualizing the annotations in COCO.
-# Please visit http://mscoco.org/ for more information on COCO, including
-# for the data, paper, and tutorials. The exact format of the annotations
-# is also described on the COCO website. For example usage of the pycocotools
-# please see pycocotools_demo.ipynb. In addition to this API, please download both
-# the COCO images and annotations in order to run the demo.
-
-# An alternative to using the API is to load the annotations directly
-# into Python dictionary
-# Using the API provides additional utility functions. Note that this API
-# supports both *instance* and *caption* annotations. In the case of
-# captions not all functions are defined (e.g. categories are undefined).
-
-# The following API functions are defined:
-#  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
-#  decodeMask - Decode binary mask M encoded via run-length encoding.
-#  encodeMask - Encode binary mask M using run-length encoding.
-#  getAnnIds  - Get ann ids that satisfy given filter conditions.
-#  getCatIds  - Get cat ids that satisfy given filter conditions.
-#  getImgIds  - Get img ids that satisfy given filter conditions.
-#  loadAnns   - Load anns with the specified ids.
-#  loadCats   - Load cats with the specified ids.
-#  loadImgs   - Load imgs with the specified ids.
-#  segToMask  - Convert polygon segmentation to binary mask.
-#  showAnns   - Display the specified annotations.
-#  loadRes    - Load result file and create result api object.
-# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
-# Help on each functions can be accessed by: "help COCO>function".
-
-# See also COCO>decodeMask,
-# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
-# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
-# COCO>loadImgs, COCO>segToMask, COCO>showAnns
-
-# Microsoft COCO Toolbox.      Version 1.0
-# Data, paper, and tutorials available at:  http://mscoco.org/
-# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
-# Licensed under the Simplified BSD License [see bsd.txt]
-from builtins import int
-
-import json
-import datetime
-import matplotlib.pyplot as plt
-from matplotlib.collections import PatchCollection
-from matplotlib.patches import Polygon
-import numpy as np
-from skimage.draw import polygon
-import copy
-
-class COCO:
-    def __init__(self, annotation_file=None):
-        """
-        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
-        :param annotation_file (str): location of annotation file
-        :param image_folder (str): location to the folder that hosts images.
-        :return:
-        """
-        # load dataset
-        self.dataset = {}
-        self.anns = []
-        self.imgToAnns = {}
-        self.catToImgs = {}
-        self.imgs = []
-        self.cats = []
-        if not annotation_file == None:
-            print('loading annotations into memory...')
-            time_t = datetime.datetime.utcnow()
-            dataset = json.load(open(annotation_file, 'r'))
-            print(datetime.datetime.utcnow() - time_t)
-            self.dataset = dataset
-            self.createIndex()
-
-    def createIndex(self):
-        # create index
-        print('creating index...')
-        imgToAnns = {ann['image_id']: [] for ann in self.dataset['annotations']}
-        anns =      {ann['id']:       [] for ann in self.dataset['annotations']}
-        for ann in self.dataset['annotations']:
-            imgToAnns[ann['image_id']] += [ann]
-            anns[ann['id']] = ann
-
-        imgs      = {im['id']: {} for im in self.dataset['images']}
-        for img in self.dataset['images']:
-            imgs[img['id']] = img
-
-        cats = []
-        catToImgs = []
-        if self.dataset['type'] == 'instances':
-            cats = {cat['id']: [] for cat in self.dataset['categories']}
-            for cat in self.dataset['categories']:
-                cats[cat['id']] = cat
-            catToImgs = {cat['id']: [] for cat in self.dataset['categories']}
-            for ann in self.dataset['annotations']:
-                catToImgs[ann['category_id']] += [ann['image_id']]
-
-        print('index created!')
-
-        # create class members
-        self.anns = anns
-        self.imgToAnns = imgToAnns
-        self.catToImgs = catToImgs
-        self.imgs = imgs
-        self.cats = cats
-
-    def info(self):
-        """
-        Print information about the annotation file.
-        :return:
-        """
-        for key, value in self.datset['info'].items():
-            print('%s: %s'%(key, value))
-
-    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
-        """
-        Get ann ids that satisfy given filter conditions. default skips that filter
-        :param imgIds  (int array)     : get anns for given imgs
-               catIds  (int array)     : get anns for given cats
-               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
-               iscrowd (boolean)       : get anns for given crowd label (False or True)
-        :return: ids (int array)       : integer array of ann ids
-        """
-        imgIds = imgIds if type(imgIds) == list else [imgIds]
-        catIds = catIds if type(catIds) == list else [catIds]
-
-        if len(imgIds) == len(catIds) == len(areaRng) == 0:
-            anns = self.dataset['annotations']
-        else:
-            if not len(imgIds) == 0:
-                anns = sum([self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns],[])
-            else:
-                anns = self.dataset['annotations']
-            anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
-            anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
-        if self.dataset['type'] == 'instances':
-            if not iscrowd == None:
-                ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
-            else:
-                ids = [ann['id'] for ann in anns]
-        else:
-            ids = [ann['id'] for ann in anns]
-        return ids
-
-    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
-        """
-        filtering parameters. default skips that filter.
-        :param catNms (str array)  : get cats for given cat names
-        :param supNms (str array)  : get cats for given supercategory names
-        :param catIds (int array)  : get cats for given cat ids
-        :return: ids (int array)   : integer array of cat ids
-        """
-        catNms = catNms if type(catNms) == list else [catNms]
-        supNms = supNms if type(supNms) == list else [supNms]
-        catIds = catIds if type(catIds) == list else [catIds]
-
-        if len(catNms) == len(supNms) == len(catIds) == 0:
-            cats = self.dataset['categories']
-        else:
-            cats = self.dataset['categories']
-            cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
-            cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
-            cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
-        ids = [cat['id'] for cat in cats]
-        return ids
-
-    def getImgIds(self, imgIds=[], catIds=[]):
-        '''
-        Get img ids that satisfy given filter conditions.
-        :param imgIds (int array) : get imgs for given ids
-        :param catIds (int array) : get imgs with all given cats
-        :return: ids (int array)  : integer array of img ids
-        '''
-        imgIds = imgIds if type(imgIds) == list else [imgIds]
-        catIds = catIds if type(catIds) == list else [catIds]
-
-        if len(imgIds) == len(catIds) == 0:
-            ids = self.imgs.keys()
-        else:
-            ids = set(imgIds)
-            for catId in catIds:
-                if len(ids) == 0:
-                    ids = set(self.catToImgs[catId])
-                else:
-                    ids &= set(self.catToImgs[catId])
-        return list(ids)
-
-    def loadAnns(self, ids=[]):
-        """
-        Load anns with the specified ids.
-        :param ids (int array)       : integer ids specifying anns
-        :return: anns (object array) : loaded ann objects
-        """
-        if type(ids) == list:
-            return [self.anns[id_] for id_ in ids]
-        elif isinstance(ids, int):
-            return [self.anns[ids]]
-
-    def loadCats(self, ids=[]):
-        """
-        Load cats with the specified ids.
-        :param ids (int array)       : integer ids specifying cats
-        :return: cats (object array) : loaded cat objects
-        """
-        if type(ids) == list:
-            return [self.cats[id_] for id_ in ids]
-        elif isinstance(ids, int):
-            return [self.cats[ids]]
-
-    def loadImgs(self, ids=[]):
-        """
-        Load anns with the specified ids.
-        :param ids (int array)       : integer ids specifying img
-        :return: imgs (object array) : loaded img objects
-        """
-        if type(ids) == list:
-            return [self.imgs[id_] for id_ in ids]
-        elif isinstance(ids, int):
-            return [self.imgs[ids]]
-
-    def showAnns(self, anns):
-        """
-        Display the specified annotations.
-        :param anns (array of object): annotations to display
-        :return: None
-        """
-        if len(anns) == 0:
-            return 0
-        if self.dataset['type'] == 'instances':
-            ax = plt.gca()
-            polygons = []
-            color = []
-            for ann in anns:
-                c = np.random.random((1, 3)).tolist()[0]
-                if type(ann['segmentation']) == list:
-                    # polygon
-                    for seg in ann['segmentation']:
-                        poly = np.array(seg).reshape((len(seg)//2, 2))
-                        polygons.append(Polygon(poly, True,alpha=0.4))
-                        color.append(c)
-                else:
-                    # mask
-                    mask = COCO.decodeMask(ann['segmentation'])
-                    img = np.ones( (mask.shape[0], mask.shape[1], 3) )
-                    if ann['iscrowd'] == 1:
-                        color_mask = np.array([2.0,166.0,101.0])/255
-                    if ann['iscrowd'] == 0:
-                        color_mask = np.random.random((1, 3)).tolist()[0]
-                    for i in range(3):
-                        img[:,:,i] = color_mask[i]
-                    ax.imshow(np.dstack( (img, mask*0.5) ))
-            p = PatchCollection(polygons, facecolors=color, edgecolors=(0,0,0,1), linewidths=3, alpha=0.4)
-            ax.add_collection(p)
-        if self.dataset['type'] == 'captions':
-            for ann in anns:
-                print(ann['caption'])
-
-    def loadRes(self, resFile):
-        """
-        Load result file and return a result api object.
-        :param   resFile (str)     : file name of result file
-        :return: res (obj)         : result api object
-        """
-        res = COCO()
-        res.dataset['images'] = [img for img in self.dataset['images']]
-        res.dataset['info'] = copy.deepcopy(self.dataset['info'])
-        res.dataset['type'] = copy.deepcopy(self.dataset['type'])
-        res.dataset['licenses'] = copy.deepcopy(self.dataset['licenses'])
-
-        print('Loading and preparing results...     ')
-        time_t = datetime.datetime.utcnow()
-        anns    = json.load(open(resFile))
-        assert type(anns) == list, 'results in not an array of objects'
-        annsImgIds = [ann['image_id'] for ann in anns]
-        assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
-               'Results do not correspond to current coco set'
-        if 'caption' in anns[0]:
-            imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
-            res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
-            for id_, ann in enumerate(anns):
-                ann['id'] = id_
-        elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
-            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
-            for id_, ann in enumerate(anns):
-                bb = ann['bbox']
-                x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
-                ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
-                ann['area'] = bb[2]*bb[3]
-                ann['id'] = id_
-                ann['iscrowd'] = 0
-        elif 'segmentation' in anns[0]:
-            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
-            for id_, ann in enumerate(anns):
-                ann['area']=sum(ann['segmentation']['counts'][2:-1:2])
-                ann['bbox'] = []
-                ann['id'] = id_
-                ann['iscrowd'] = 0
-        print('DONE (t=%0.2fs)'%((datetime.datetime.utcnow() - time_t).total_seconds()))
-
-        res.dataset['annotations'] = anns
-        res.createIndex()
-        return res
-
-
-    @staticmethod
-    def decodeMask(R):
-        """
-        Decode binary mask M encoded via run-length encoding.
-        :param   R (object RLE)    : run-length encoding of binary mask
-        :return: M (bool 2D array) : decoded binary mask
-        """
-        N = len(R['counts'])
-        M = np.zeros( (R['size'][0]*R['size'][1], ))
-        n = 0
-        val = 1
-        for pos in range(N):
-            val = not val
-            for c in range(R['counts'][pos]):
-                R['counts'][pos]
-                M[n] = val
-                n += 1
-        return M.reshape((R['size']), order='F')
-
-    @staticmethod
-    def encodeMask(M):
-        """
-        Encode binary mask M using run-length encoding.
-        :param   M (bool 2D array)  : binary mask to encode
-        :return: R (object RLE)     : run-length encoding of binary mask
-        """
-        [h, w] = M.shape
-        M = M.flatten(order='F')
-        N = len(M)
-        counts_list = []
-        pos = 0
-        # counts
-        counts_list.append(1)
-        diffs = np.logical_xor(M[0:N-1], M[1:N])
-        for diff in diffs:
-            if diff:
-                pos +=1
-                counts_list.append(1)
-            else:
-                counts_list[pos] += 1
-        # if array starts from 1. start with 0 counts for 0
-        if M[0] == 1:
-            counts_list = [0] + counts_list
-        return {'size':      [h, w],
-               'counts':    counts_list ,
-               }
-
-    @staticmethod
-    def segToMask( S, h, w ):
-         """
-         Convert polygon segmentation to binary mask.
-         :param   S (float array)   : polygon segmentation mask
-         :param   h (int)           : target mask height
-         :param   w (int)           : target mask width
-         :return: M (bool 2D array) : binary mask
-         """
-         M = np.zeros((h,w), dtype=np.bool)
-         for s in S:
-             N = len(s)
-             rr, cc = polygon(np.array(s[1:N:2]), np.array(s[0:N:2])) # (y, x)
-             M[rr, cc] = 1
-         return M
diff --git a/nlp_metrics/common/gason.cpp b/nlp_metrics/common/gason.cpp
new file mode 100644
index 0000000..0f2c00e
--- /dev/null
+++ b/nlp_metrics/common/gason.cpp
@@ -0,0 +1,335 @@
+// https://github.com/vivkin/gason - pulled January 10, 2016
+#include "gason.h"
+#include <stdlib.h>
+
+#define JSON_ZONE_SIZE 4096
+#define JSON_STACK_SIZE 32
+
+const char *jsonStrError(int err) {
+    switch (err) {
+#define XX(no, str) \
+    case JSON_##no: \
+        return str;
+        JSON_ERRNO_MAP(XX)
+#undef XX
+    default:
+        return "unknown";
+    }
+}
+
+void *JsonAllocator::allocate(size_t size) {
+    size = (size + 7) & ~7;
+
+    if (head && head->used + size <= JSON_ZONE_SIZE) {
+        char *p = (char *)head + head->used;
+        head->used += size;
+        return p;
+    }
+
+    size_t allocSize = sizeof(Zone) + size;
+    Zone *zone = (Zone *)malloc(allocSize <= JSON_ZONE_SIZE ? JSON_ZONE_SIZE : allocSize);
+    if (zone == nullptr)
+        return nullptr;
+    zone->used = allocSize;
+    if (allocSize <= JSON_ZONE_SIZE || head == nullptr) {
+        zone->next = head;
+        head = zone;
+    } else {
+        zone->next = head->next;
+        head->next = zone;
+    }
+    return (char *)zone + sizeof(Zone);
+}
+
+void JsonAllocator::deallocate() {
+    while (head) {
+        Zone *next = head->next;
+        free(head);
+        head = next;
+    }
+}
+
+static inline bool isspace(char c) {
+    return c == ' ' || (c >= '\t' && c <= '\r');
+}
+
+static inline bool isdelim(char c) {
+    return c == ',' || c == ':' || c == ']' || c == '}' || isspace(c) || !c;
+}
+
+static inline bool isdigit(char c) {
+    return c >= '0' && c <= '9';
+}
+
+static inline bool isxdigit(char c) {
+    return (c >= '0' && c <= '9') || ((c & ~' ') >= 'A' && (c & ~' ') <= 'F');
+}
+
+static inline int char2int(char c) {
+    if (c <= '9')
+        return c - '0';
+    return (c & ~' ') - 'A' + 10;
+}
+
+static double string2double(char *s, char **endptr) {
+    char ch = *s;
+    if (ch == '-')
+        ++s;
+
+    double result = 0;
+    while (isdigit(*s))
+        result = (result * 10) + (*s++ - '0');
+
+    if (*s == '.') {
+        ++s;
+
+        double fraction = 1;
+        while (isdigit(*s)) {
+            fraction *= 0.1;
+            result += (*s++ - '0') * fraction;
+        }
+    }
+
+    if (*s == 'e' || *s == 'E') {
+        ++s;
+
+        double base = 10;
+        if (*s == '+')
+            ++s;
+        else if (*s == '-') {
+            ++s;
+            base = 0.1;
+        }
+
+        unsigned int exponent = 0;
+        while (isdigit(*s))
+            exponent = (exponent * 10) + (*s++ - '0');
+
+        double power = 1;
+        for (; exponent; exponent >>= 1, base *= base)
+            if (exponent & 1)
+                power *= base;
+
+        result *= power;
+    }
+
+    *endptr = s;
+    return ch == '-' ? -result : result;
+}
+
+static inline JsonNode *insertAfter(JsonNode *tail, JsonNode *node) {
+    if (!tail)
+        return node->next = node;
+    node->next = tail->next;
+    tail->next = node;
+    return node;
+}
+
+static inline JsonValue listToValue(JsonTag tag, JsonNode *tail) {
+    if (tail) {
+        auto head = tail->next;
+        tail->next = nullptr;
+        return JsonValue(tag, head);
+    }
+    return JsonValue(tag, nullptr);
+}
+
+int jsonParse(char *s, char **endptr, JsonValue *value, JsonAllocator &allocator) {
+    JsonNode *tails[JSON_STACK_SIZE];
+    JsonTag tags[JSON_STACK_SIZE];
+    char *keys[JSON_STACK_SIZE];
+    JsonValue o;
+    int pos = -1;
+    bool separator = true;
+    JsonNode *node;
+    *endptr = s;
+
+    while (*s) {
+        while (isspace(*s)) {
+            ++s;
+            if (!*s) break;
+        }
+        *endptr = s++;
+        switch (**endptr) {
+        case '-':
+            if (!isdigit(*s) && *s != '.') {
+                *endptr = s;
+                return JSON_BAD_NUMBER;
+            }
+        case '0':
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            o = JsonValue(string2double(*endptr, &s));
+            if (!isdelim(*s)) {
+                *endptr = s;
+                return JSON_BAD_NUMBER;
+            }
+            break;
+        case '"':
+            o = JsonValue(JSON_STRING, s);
+            for (char *it = s; *s; ++it, ++s) {
+                int c = *it = *s;
+                if (c == '\\') {
+                    c = *++s;
+                    switch (c) {
+                    case '\\':
+                    case '"':
+                    case '/':
+                        *it = c;
+                        break;
+                    case 'b':
+                        *it = '\b';
+                        break;
+                    case 'f':
+                        *it = '\f';
+                        break;
+                    case 'n':
+                        *it = '\n';
+                        break;
+                    case 'r':
+                        *it = '\r';
+                        break;
+                    case 't':
+                        *it = '\t';
+                        break;
+                    case 'u':
+                        c = 0;
+                        for (int i = 0; i < 4; ++i) {
+                            if (isxdigit(*++s)) {
+                                c = c * 16 + char2int(*s);
+                            } else {
+                                *endptr = s;
+                                return JSON_BAD_STRING;
+                            }
+                        }
+                        if (c < 0x80) {
+                            *it = c;
+                        } else if (c < 0x800) {
+                            *it++ = 0xC0 | (c >> 6);
+                            *it = 0x80 | (c & 0x3F);
+                        } else {
+                            *it++ = 0xE0 | (c >> 12);
+                            *it++ = 0x80 | ((c >> 6) & 0x3F);
+                            *it = 0x80 | (c & 0x3F);
+                        }
+                        break;
+                    default:
+                        *endptr = s;
+                        return JSON_BAD_STRING;
+                    }
+                } else if ((unsigned int)c < ' ' || c == '\x7F') {
+                    *endptr = s;
+                    return JSON_BAD_STRING;
+                } else if (c == '"') {
+                    *it = 0;
+                    ++s;
+                    break;
+                }
+            }
+            if (!isdelim(*s)) {
+                *endptr = s;
+                return JSON_BAD_STRING;
+            }
+            break;
+        case 't':
+            if (!(s[0] == 'r' && s[1] == 'u' && s[2] == 'e' && isdelim(s[3])))
+                return JSON_BAD_IDENTIFIER;
+            o = JsonValue(JSON_TRUE);
+            s += 3;
+            break;
+        case 'f':
+            if (!(s[0] == 'a' && s[1] == 'l' && s[2] == 's' && s[3] == 'e' && isdelim(s[4])))
+                return JSON_BAD_IDENTIFIER;
+            o = JsonValue(JSON_FALSE);
+            s += 4;
+            break;
+        case 'n':
+            if (!(s[0] == 'u' && s[1] == 'l' && s[2] == 'l' && isdelim(s[3])))
+                return JSON_BAD_IDENTIFIER;
+            o = JsonValue(JSON_NULL);
+            s += 3;
+            break;
+        case ']':
+            if (pos == -1)
+                return JSON_STACK_UNDERFLOW;
+            if (tags[pos] != JSON_ARRAY)
+                return JSON_MISMATCH_BRACKET;
+            o = listToValue(JSON_ARRAY, tails[pos--]);
+            break;
+        case '}':
+            if (pos == -1)
+                return JSON_STACK_UNDERFLOW;
+            if (tags[pos] != JSON_OBJECT)
+                return JSON_MISMATCH_BRACKET;
+            if (keys[pos] != nullptr)
+                return JSON_UNEXPECTED_CHARACTER;
+            o = listToValue(JSON_OBJECT, tails[pos--]);
+            break;
+        case '[':
+            if (++pos == JSON_STACK_SIZE)
+                return JSON_STACK_OVERFLOW;
+            tails[pos] = nullptr;
+            tags[pos] = JSON_ARRAY;
+            keys[pos] = nullptr;
+            separator = true;
+            continue;
+        case '{':
+            if (++pos == JSON_STACK_SIZE)
+                return JSON_STACK_OVERFLOW;
+            tails[pos] = nullptr;
+            tags[pos] = JSON_OBJECT;
+            keys[pos] = nullptr;
+            separator = true;
+            continue;
+        case ':':
+            if (separator || keys[pos] == nullptr)
+                return JSON_UNEXPECTED_CHARACTER;
+            separator = true;
+            continue;
+        case ',':
+            if (separator || keys[pos] != nullptr)
+                return JSON_UNEXPECTED_CHARACTER;
+            separator = true;
+            continue;
+        case '\0':
+            continue;
+        default:
+            return JSON_UNEXPECTED_CHARACTER;
+        }
+
+        separator = false;
+
+        if (pos == -1) {
+            *endptr = s;
+            *value = o;
+            return JSON_OK;
+        }
+
+        if (tags[pos] == JSON_OBJECT) {
+            if (!keys[pos]) {
+                if (o.getTag() != JSON_STRING)
+                    return JSON_UNQUOTED_KEY;
+                keys[pos] = o.toString();
+                continue;
+            }
+            if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode))) == nullptr)
+                return JSON_ALLOCATION_FAILURE;
+            tails[pos] = insertAfter(tails[pos], node);
+            tails[pos]->key = keys[pos];
+            keys[pos] = nullptr;
+        } else {
+            if ((node = (JsonNode *) allocator.allocate(sizeof(JsonNode) - sizeof(char *))) == nullptr)
+                return JSON_ALLOCATION_FAILURE;
+            tails[pos] = insertAfter(tails[pos], node);
+        }
+        tails[pos]->value = o;
+    }
+    return JSON_BREAKING_BAD;
+}
diff --git a/nlp_metrics/common/gason.h b/nlp_metrics/common/gason.h
new file mode 100644
index 0000000..2e728d9
--- /dev/null
+++ b/nlp_metrics/common/gason.h
@@ -0,0 +1,136 @@
+// https://github.com/vivkin/gason - pulled January 10, 2016
+#pragma once
+
+#include <stdint.h>
+#include <stddef.h>
+#include <assert.h>
+
+enum JsonTag {
+    JSON_NUMBER = 0,
+    JSON_STRING,
+    JSON_ARRAY,
+    JSON_OBJECT,
+    JSON_TRUE,
+    JSON_FALSE,
+    JSON_NULL = 0xF
+};
+
+struct JsonNode;
+
+#define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
+#define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
+#define JSON_VALUE_TAG_MASK 0xF
+#define JSON_VALUE_TAG_SHIFT 47
+
+union JsonValue {
+    uint64_t ival;
+    double fval;
+
+    JsonValue(double x)
+        : fval(x) {
+    }
+    JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
+        assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
+        ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
+    }
+    bool isDouble() const {
+        return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
+    }
+    JsonTag getTag() const {
+        return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
+    }
+    uint64_t getPayload() const {
+        assert(!isDouble());
+        return ival & JSON_VALUE_PAYLOAD_MASK;
+    }
+    double toNumber() const {
+        assert(getTag() == JSON_NUMBER);
+        return fval;
+    }
+    char *toString() const {
+        assert(getTag() == JSON_STRING);
+        return (char *)getPayload();
+    }
+    JsonNode *toNode() const {
+        assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
+        return (JsonNode *)getPayload();
+    }
+};
+
+struct JsonNode {
+    JsonValue value;
+    JsonNode *next;
+    char *key;
+};
+
+struct JsonIterator {
+    JsonNode *p;
+
+    void operator++() {
+        p = p->next;
+    }
+    bool operator!=(const JsonIterator &x) const {
+        return p != x.p;
+    }
+    JsonNode *operator*() const {
+        return p;
+    }
+    JsonNode *operator->() const {
+        return p;
+    }
+};
+
+inline JsonIterator begin(JsonValue o) {
+    return JsonIterator{o.toNode()};
+}
+inline JsonIterator end(JsonValue) {
+    return JsonIterator{nullptr};
+}
+
+#define JSON_ERRNO_MAP(XX)                           \
+    XX(OK, "ok")                                     \
+    XX(BAD_NUMBER, "bad number")                     \
+    XX(BAD_STRING, "bad string")                     \
+    XX(BAD_IDENTIFIER, "bad identifier")             \
+    XX(STACK_OVERFLOW, "stack overflow")             \
+    XX(STACK_UNDERFLOW, "stack underflow")           \
+    XX(MISMATCH_BRACKET, "mismatch bracket")         \
+    XX(UNEXPECTED_CHARACTER, "unexpected character") \
+    XX(UNQUOTED_KEY, "unquoted key")                 \
+    XX(BREAKING_BAD, "breaking bad")                 \
+    XX(ALLOCATION_FAILURE, "allocation failure")
+
+enum JsonErrno {
+#define XX(no, str) JSON_##no,
+    JSON_ERRNO_MAP(XX)
+#undef XX
+};
+
+const char *jsonStrError(int err);
+
+class JsonAllocator {
+    struct Zone {
+        Zone *next;
+        size_t used;
+    } *head = nullptr;
+
+public:
+    JsonAllocator() = default;
+    JsonAllocator(const JsonAllocator &) = delete;
+    JsonAllocator &operator=(const JsonAllocator &) = delete;
+    JsonAllocator(JsonAllocator &&x) : head(x.head) {
+        x.head = nullptr;
+    }
+    JsonAllocator &operator=(JsonAllocator &&x) {
+        head = x.head;
+        x.head = nullptr;
+        return *this;
+    }
+    ~JsonAllocator() {
+        deallocate();
+    }
+    void *allocate(size_t size);
+    void deallocate();
+};
+
+int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
diff --git a/nlp_metrics/common/maskApi.c b/nlp_metrics/common/maskApi.c
new file mode 100644
index 0000000..917dc28
--- /dev/null
+++ b/nlp_metrics/common/maskApi.c
@@ -0,0 +1,231 @@
+/**************************************************************************
+* Microsoft COCO Toolbox.      version 2.0
+* Data, paper, and tutorials available at:  http://mscoco.org/
+* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+* Licensed under the Simplified BSD License [see coco/license.txt]
+**************************************************************************/
+#include "maskApi.h"
+#include <math.h>
+#include <stdlib.h>
+
+uint umin( uint a, uint b ) { return (a<b) ? a : b; }
+uint umax( uint a, uint b ) { return (a>b) ? a : b; }
+
+void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ) {
+  R->h=h; R->w=w; R->m=m; R->cnts=(m==0)?0:malloc(sizeof(uint)*m);
+  siz j; if(cnts) for(j=0; j<m; j++) R->cnts[j]=cnts[j];
+}
+
+void rleFree( RLE *R ) {
+  free(R->cnts); R->cnts=0;
+}
+
+void rlesInit( RLE **R, siz n ) {
+  siz i; *R = (RLE*) malloc(sizeof(RLE)*n);
+  for(i=0; i<n; i++) rleInit((*R)+i,0,0,0,0);
+}
+
+void rlesFree( RLE **R, siz n ) {
+  siz i; for(i=0; i<n; i++) rleFree((*R)+i); free(*R); *R=0;
+}
+
+void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n ) {
+  siz i, j, k, a=w*h; uint c, *cnts; byte p;
+  cnts = malloc(sizeof(uint)*(a+1));
+  for(i=0; i<n; i++) {
+    const byte *T=M+a*i; k=0; p=0; c=0;
+    for(j=0; j<a; j++) { if(T[j]!=p) { cnts[k++]=c; c=0; p=T[j]; } c++; }
+    cnts[k++]=c; rleInit(R+i,h,w,k,cnts);
+  }
+  free(cnts);
+}
+
+void rleDecode( const RLE *R, byte *M, siz n ) {
+  siz i, j, k; for( i=0; i<n; i++ ) {
+    byte v=0; for( j=0; j<R[i].m; j++ ) {
+      for( k=0; k<R[i].cnts[j]; k++ ) *(M++)=v; v=!v; }}
+}
+
+void rleMerge( const RLE *R, RLE *M, siz n, int intersect ) {
+  uint *cnts, c, ca, cb, cc, ct; int v, va, vb, vp;
+  siz i, a, b, h=R[0].h, w=R[0].w, m=R[0].m; RLE A, B;
+  if(n==0) { rleInit(M,0,0,0,0); return; }
+  if(n==1) { rleInit(M,h,w,m,R[0].cnts); return; }
+  cnts = malloc(sizeof(uint)*(h*w+1));
+  for( a=0; a<m; a++ ) cnts[a]=R[0].cnts[a];
+  for( i=1; i<n; i++ ) {
+    B=R[i]; if(B.h!=h||B.w!=w) { h=w=m=0; break; }
+    rleInit(&A,h,w,m,cnts); ca=A.cnts[0]; cb=B.cnts[0];
+    v=va=vb=0; m=0; a=b=1; cc=0; ct=1;
+    while( ct>0 ) {
+      c=umin(ca,cb); cc+=c; ct=0;
+      ca-=c; if(!ca && a<A.m) { ca=A.cnts[a++]; va=!va; } ct+=ca;
+      cb-=c; if(!cb && b<B.m) { cb=B.cnts[b++]; vb=!vb; } ct+=cb;
+      vp=v; if(intersect) v=va&&vb; else v=va||vb;
+      if( v!=vp||ct==0 ) { cnts[m++]=cc; cc=0; }
+    }
+    rleFree(&A);
+  }
+  rleInit(M,h,w,m,cnts); free(cnts);
+}
+
+void rleArea( const RLE *R, siz n, uint *a ) {
+  siz i, j; for( i=0; i<n; i++ ) {
+    a[i]=0; for( j=1; j<R[i].m; j+=2 ) a[i]+=R[i].cnts[j]; }
+}
+
+void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ) {
+  siz g, d; BB db, gb; int crowd;
+  db=malloc(sizeof(double)*m*4); rleToBbox(dt,db,m);
+  gb=malloc(sizeof(double)*n*4); rleToBbox(gt,gb,n);
+  bbIou(db,gb,m,n,iscrowd,o); free(db); free(gb);
+  for( g=0; g<n; g++ ) for( d=0; d<m; d++ ) if(o[g*m+d]>0) {
+    crowd=iscrowd!=NULL && iscrowd[g];
+    if(dt[d].h!=gt[g].h || dt[d].w!=gt[g].w) { o[g*m+d]=-1; continue; }
+    siz ka, kb, a, b; uint c, ca, cb, ct, i, u; int va, vb;
+    ca=dt[d].cnts[0]; ka=dt[d].m; va=vb=0;
+    cb=gt[g].cnts[0]; kb=gt[g].m; a=b=1; i=u=0; ct=1;
+    while( ct>0 ) {
+      c=umin(ca,cb); if(va||vb) { u+=c; if(va&&vb) i+=c; } ct=0;
+      ca-=c; if(!ca && a<ka) { ca=dt[d].cnts[a++]; va=!va; } ct+=ca;
+      cb-=c; if(!cb && b<kb) { cb=gt[g].cnts[b++]; vb=!vb; } ct+=cb;
+    }
+    if(i==0) u=1; else if(crowd) rleArea(dt+d,1,&u);
+    o[g*m+d] = (double)i/(double)u;
+  }
+}
+
+void rleNms( RLE *dt, siz n, uint *keep, double thr ) {
+  siz i, j; double u;
+  for( i=0; i<n; i++ ) keep[i]=1;
+  for( i=0; i<n; i++ ) if(keep[i]) {
+    for( j=i+1; j<n; j++ ) if(keep[j]) {
+      rleIou(dt+i,dt+j,1,1,0,&u);
+      if(u>thr) keep[j]=0;
+    }
+  }
+}
+
+void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ) {
+  double h, w, i, u, ga, da; siz g, d; int crowd;
+  for( g=0; g<n; g++ ) {
+    BB G=gt+g*4; ga=G[2]*G[3]; crowd=iscrowd!=NULL && iscrowd[g];
+    for( d=0; d<m; d++ ) {
+      BB D=dt+d*4; da=D[2]*D[3]; o[g*m+d]=0;
+      w=fmin(D[2]+D[0],G[2]+G[0])-fmax(D[0],G[0]); if(w<=0) continue;
+      h=fmin(D[3]+D[1],G[3]+G[1])-fmax(D[1],G[1]); if(h<=0) continue;
+      i=w*h; u = crowd ? da : da+ga-i; o[g*m+d]=i/u;
+    }
+  }
+}
+
+void bbNms( BB dt, siz n, uint *keep, double thr ) {
+  siz i, j; double u;
+  for( i=0; i<n; i++ ) keep[i]=1;
+  for( i=0; i<n; i++ ) if(keep[i]) {
+    for( j=i+1; j<n; j++ ) if(keep[j]) {
+      bbIou(dt+i*4,dt+j*4,1,1,0,&u);
+      if(u>thr) keep[j]=0;
+    }
+  }
+}
+
+void rleToBbox( const RLE *R, BB bb, siz n ) {
+  siz i; for( i=0; i<n; i++ ) {
+    uint h, w, x, y, xs, ys, xe, ye, xp, cc, t; siz j, m;
+    h=(uint)R[i].h; w=(uint)R[i].w; m=R[i].m;
+    m=((siz)(m/2))*2; xs=w; ys=h; xe=ye=0; cc=0;
+    if(m==0) { bb[4*i+0]=bb[4*i+1]=bb[4*i+2]=bb[4*i+3]=0; continue; }
+    for( j=0; j<m; j++ ) {
+      cc+=R[i].cnts[j]; t=cc-j%2; y=t%h; x=(t-y)/h;
+      if(j%2==0) xp=x; else if(xp<x) { ys=0; ye=h-1; }
+      xs=umin(xs,x); xe=umax(xe,x); ys=umin(ys,y); ye=umax(ye,y);
+    }
+    bb[4*i+0]=xs; bb[4*i+2]=xe-xs+1;
+    bb[4*i+1]=ys; bb[4*i+3]=ye-ys+1;
+  }
+}
+
+void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ) {
+  siz i; for( i=0; i<n; i++ ) {
+    double xs=bb[4*i+0], xe=xs+bb[4*i+2];
+    double ys=bb[4*i+1], ye=ys+bb[4*i+3];
+    double xy[8] = {xs,ys,xs,ye,xe,ye,xe,ys};
+    rleFrPoly( R+i, xy, 4, h, w );
+  }
+}
+
+int uintCompare(const void *a, const void *b) {
+  uint c=*((uint*)a), d=*((uint*)b); return c>d?1:c<d?-1:0;
+}
+
+void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ) {
+  /* upsample and get discrete points densely along entire boundary */
+  siz j, m=0; double scale=5; int *x, *y, *u, *v; uint *a, *b;
+  x=malloc(sizeof(int)*(k+1)); y=malloc(sizeof(int)*(k+1));
+  for(j=0; j<k; j++) x[j]=(int)(scale*xy[j*2+0]+.5); x[k]=x[0];
+  for(j=0; j<k; j++) y[j]=(int)(scale*xy[j*2+1]+.5); y[k]=y[0];
+  for(j=0; j<k; j++) m+=umax(abs(x[j]-x[j+1]),abs(y[j]-y[j+1]))+1;
+  u=malloc(sizeof(int)*m); v=malloc(sizeof(int)*m); m=0;
+  for( j=0; j<k; j++ ) {
+    int xs=x[j], xe=x[j+1], ys=y[j], ye=y[j+1], dx, dy, t, d;
+    int flip; double s; dx=abs(xe-xs); dy=abs(ys-ye);
+    flip = (dx>=dy && xs>xe) || (dx<dy && ys>ye);
+    if(flip) { t=xs; xs=xe; xe=t; t=ys; ys=ye; ye=t; }
+    s = dx>=dy ? (double)(ye-ys)/dx : (double)(xe-xs)/dy;
+    if(dx>=dy) for( d=0; d<=dx; d++ ) {
+      t=flip?dx-d:d; u[m]=t+xs; v[m]=(int)(ys+s*t+.5); m++;
+    } else for( d=0; d<=dy; d++ ) {
+      t=flip?dy-d:d; v[m]=t+ys; u[m]=(int)(xs+s*t+.5); m++;
+    }
+  }
+  /* get points along y-boundary and downsample */
+  free(x); free(y); k=m; m=0; double xd, yd;
+  x=malloc(sizeof(int)*k); y=malloc(sizeof(int)*k);
+  for( j=1; j<k; j++ ) if(u[j]!=u[j-1]) {
+    xd=(double)(u[j]<u[j-1]?u[j]:u[j]-1); xd=(xd+.5)/scale-.5;
+    if( floor(xd)!=xd || xd<0 || xd>w-1 ) continue;
+    yd=(double)(v[j]<v[j-1]?v[j]:v[j-1]); yd=(yd+.5)/scale-.5;
+    if(yd<0) yd=0; else if(yd>h) yd=h; yd=ceil(yd);
+    x[m]=(int) xd; y[m]=(int) yd; m++;
+  }
+  /* compute rle encoding given y-boundary points */
+  k=m; a=malloc(sizeof(uint)*(k+1));
+  for( j=0; j<k; j++ ) a[j]=(uint)(x[j]*(int)(h)+y[j]);
+  a[k++]=(uint)(h*w); free(u); free(v); free(x); free(y);
+  qsort(a,k,sizeof(uint),uintCompare); uint p=0;
+  for( j=0; j<k; j++ ) { uint t=a[j]; a[j]-=p; p=t; }
+  b=malloc(sizeof(uint)*k); j=m=0; b[m++]=a[j++];
+  while(j<k) if(a[j]>0) b[m++]=a[j++]; else {
+    j++; if(j<k) b[m-1]+=a[j++]; }
+  rleInit(R,h,w,m,b); free(a); free(b);
+}
+
+char* rleToString( const RLE *R ) {
+  /* Similar to LEB128 but using 6 bits/char and ascii chars 48-111. */
+  siz i, m=R->m, p=0; long x; int more;
+  char *s=malloc(sizeof(char)*m*6);
+  for( i=0; i<m; i++ ) {
+    x=(long) R->cnts[i]; if(i>2) x-=(long) R->cnts[i-2]; more=1;
+    while( more ) {
+      char c=x & 0x1f; x >>= 5; more=(c & 0x10) ? x!=-1 : x!=0;
+      if(more) c |= 0x20; c+=48; s[p++]=c;
+    }
+  }
+  s[p]=0; return s;
+}
+
+void rleFrString( RLE *R, char *s, siz h, siz w ) {
+  siz m=0, p=0, k; long x; int more; uint *cnts;
+  while( s[m] ) m++; cnts=malloc(sizeof(uint)*m); m=0;
+  while( s[p] ) {
+    x=0; k=0; more=1;
+    while( more ) {
+      char c=s[p]-48; x |= (c & 0x1f) << 5*k;
+      more = c & 0x20; p++; k++;
+      if(!more && (c & 0x10)) x |= -1 << 5*k;
+    }
+    if(m>2) x+=(long) cnts[m-2]; cnts[m++]=(uint) x;
+  }
+  rleInit(R,h,w,m,cnts); free(cnts);
+}
diff --git a/nlp_metrics/common/maskApi.h b/nlp_metrics/common/maskApi.h
new file mode 100644
index 0000000..ebc7892
--- /dev/null
+++ b/nlp_metrics/common/maskApi.h
@@ -0,0 +1,60 @@
+/**************************************************************************
+* Microsoft COCO Toolbox.      version 2.0
+* Data, paper, and tutorials available at:  http://mscoco.org/
+* Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+* Licensed under the Simplified BSD License [see coco/license.txt]
+**************************************************************************/
+#pragma once
+
+typedef unsigned int uint;
+typedef unsigned long siz;
+typedef unsigned char byte;
+typedef double* BB;
+typedef struct { siz h, w, m; uint *cnts; } RLE;
+
+/* Initialize/destroy RLE. */
+void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
+void rleFree( RLE *R );
+
+/* Initialize/destroy RLE array. */
+void rlesInit( RLE **R, siz n );
+void rlesFree( RLE **R, siz n );
+
+/* Encode binary masks using RLE. */
+void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
+
+/* Decode binary masks encoded via RLE. */
+void rleDecode( const RLE *R, byte *mask, siz n );
+
+/* Compute union or intersection of encoded masks. */
+void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
+
+/* Compute area of encoded masks. */
+void rleArea( const RLE *R, siz n, uint *a );
+
+/* Compute intersection over union between masks. */
+void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
+
+/* Compute non-maximum suppression between bounding masks */
+void rleNms( RLE *dt, siz n, uint *keep, double thr );
+
+/* Compute intersection over union between bounding boxes. */
+void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
+
+/* Compute non-maximum suppression between bounding boxes */
+void bbNms( BB dt, siz n, uint *keep, double thr );
+
+/* Get bounding boxes surrounding encoded masks. */
+void rleToBbox( const RLE *R, BB bb, siz n );
+
+/* Convert bounding boxes to encoded masks. */
+void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
+
+/* Convert polygon to encoded mask. */
+void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
+
+/* Get compressed string representation of encoded mask. */
+char* rleToString( const RLE *R );
+
+/* Convert from compressed string representation of encoded mask. */
+void rleFrString( RLE *R, char *s, siz h, siz w );
diff --git a/nlp_metrics/pycocotools/__init__.py b/nlp_metrics/pycocotools/__init__.py
new file mode 100644
index 0000000..3f7d85b
--- /dev/null
+++ b/nlp_metrics/pycocotools/__init__.py
@@ -0,0 +1 @@
+__author__ = 'tylin'
diff --git a/nlp_metrics/pycocotools/_mask.pyx b/nlp_metrics/pycocotools/_mask.pyx
new file mode 100644
index 0000000..d065837
--- /dev/null
+++ b/nlp_metrics/pycocotools/_mask.pyx
@@ -0,0 +1,308 @@
+# distutils: language = c
+# distutils: sources = ../common/maskApi.c
+
+#**************************************************************************
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+# Licensed under the Simplified BSD License [see coco/license.txt]
+#**************************************************************************
+
+__author__ = 'tsungyi'
+
+import sys
+PYTHON_VERSION = sys.version_info[0]
+
+# import both Python-level and C-level symbols of Numpy
+# the API uses Numpy to interface C and Python
+import numpy as np
+cimport numpy as np
+from libc.stdlib cimport malloc, free
+
+# intialized Numpy. must do.
+np.import_array()
+
+# import numpy C function
+# we use PyArray_ENABLEFLAGS to make Numpy ndarray responsible to memoery management
+cdef extern from "numpy/arrayobject.h":
+    void PyArray_ENABLEFLAGS(np.ndarray arr, int flags)
+
+# Declare the prototype of the C functions in MaskApi.h
+cdef extern from "maskApi.h":
+    ctypedef unsigned int uint
+    ctypedef unsigned long siz
+    ctypedef unsigned char byte
+    ctypedef double* BB
+    ctypedef struct RLE:
+        siz h,
+        siz w,
+        siz m,
+        uint* cnts,
+    void rlesInit( RLE **R, siz n )
+    void rleEncode( RLE *R, const byte *M, siz h, siz w, siz n )
+    void rleDecode( const RLE *R, byte *mask, siz n )
+    void rleMerge( const RLE *R, RLE *M, siz n, int intersect )
+    void rleArea( const RLE *R, siz n, uint *a )
+    void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o )
+    void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o )
+    void rleToBbox( const RLE *R, BB bb, siz n )
+    void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n )
+    void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w )
+    char* rleToString( const RLE *R )
+    void rleFrString( RLE *R, char *s, siz h, siz w )
+
+# python class to wrap RLE array in C
+# the class handles the memory allocation and deallocation
+cdef class RLEs:
+    cdef RLE *_R
+    cdef siz _n
+
+    def __cinit__(self, siz n =0):
+        rlesInit(&self._R, n)
+        self._n = n
+
+    # free the RLE array here
+    def __dealloc__(self):
+        if self._R is not NULL:
+            for i in range(self._n):
+                free(self._R[i].cnts)
+            free(self._R)
+    def __getattr__(self, key):
+        if key == 'n':
+            return self._n
+        raise AttributeError(key)
+
+# python class to wrap Mask array in C
+# the class handles the memory allocation and deallocation
+cdef class Masks:
+    cdef byte *_mask
+    cdef siz _h
+    cdef siz _w
+    cdef siz _n
+
+    def __cinit__(self, h, w, n):
+        self._mask = <byte*> malloc(h*w*n* sizeof(byte))
+        self._h = h
+        self._w = w
+        self._n = n
+    # def __dealloc__(self):
+        # the memory management of _mask has been passed to np.ndarray
+        # it doesn't need to be freed here
+
+    # called when passing into np.array() and return an np.ndarray in column-major order
+    def __array__(self):
+        cdef np.npy_intp shape[1]
+        shape[0] = <np.npy_intp> self._h*self._w*self._n
+        # Create a 1D array, and reshape it to fortran/Matlab column-major array
+        ndarray = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT8, self._mask).reshape((self._h, self._w, self._n), order='F')
+        # The _mask allocated by Masks is now handled by ndarray
+        PyArray_ENABLEFLAGS(ndarray, np.NPY_OWNDATA)
+        return ndarray
+
+# internal conversion from Python RLEs object to compressed RLE format
+def _toString(RLEs Rs):
+    cdef siz n = Rs.n
+    cdef bytes py_string
+    cdef char* c_string
+    objs = []
+    for i in range(n):
+        c_string = rleToString( <RLE*> &Rs._R[i] )
+        py_string = c_string
+        objs.append({
+            'size': [Rs._R[i].h, Rs._R[i].w],
+            'counts': py_string
+        })
+        free(c_string)
+    return objs
+
+# internal conversion from compressed RLE format to Python RLEs object
+def _frString(rleObjs):
+    cdef siz n = len(rleObjs)
+    Rs = RLEs(n)
+    cdef bytes py_string
+    cdef char* c_string
+    for i, obj in enumerate(rleObjs):
+        if PYTHON_VERSION == 2:
+            py_string = str(obj['counts']).encode('utf8')
+        elif PYTHON_VERSION == 3:
+            py_string = str.encode(obj['counts']) if type(obj['counts']) == str else obj['counts']
+        else:
+            raise Exception('Python version must be 2 or 3')
+        c_string = py_string
+        rleFrString( <RLE*> &Rs._R[i], <char*> c_string, obj['size'][0], obj['size'][1] )
+    return Rs
+
+# encode mask to RLEs objects
+# list of RLE string can be generated by RLEs member function
+def encode(np.ndarray[np.uint8_t, ndim=3, mode='fortran'] mask):
+    h, w, n = mask.shape[0], mask.shape[1], mask.shape[2]
+    cdef RLEs Rs = RLEs(n)
+    rleEncode(Rs._R,<byte*>mask.data,h,w,n)
+    objs = _toString(Rs)
+    return objs
+
+# decode mask from compressed list of RLE string or RLEs object
+def decode(rleObjs):
+    cdef RLEs Rs = _frString(rleObjs)
+    h, w, n = Rs._R[0].h, Rs._R[0].w, Rs._n
+    masks = Masks(h, w, n)
+    rleDecode(<RLE*>Rs._R, masks._mask, n);
+    return np.array(masks)
+
+def merge(rleObjs, intersect=0):
+    cdef RLEs Rs = _frString(rleObjs)
+    cdef RLEs R = RLEs(1)
+    rleMerge(<RLE*>Rs._R, <RLE*> R._R, <siz> Rs._n, intersect)
+    obj = _toString(R)[0]
+    return obj
+
+def area(rleObjs):
+    cdef RLEs Rs = _frString(rleObjs)
+    cdef uint* _a = <uint*> malloc(Rs._n* sizeof(uint))
+    rleArea(Rs._R, Rs._n, _a)
+    cdef np.npy_intp shape[1]
+    shape[0] = <np.npy_intp> Rs._n
+    a = np.array((Rs._n, ), dtype=np.uint8)
+    a = np.PyArray_SimpleNewFromData(1, shape, np.NPY_UINT32, _a)
+    PyArray_ENABLEFLAGS(a, np.NPY_OWNDATA)
+    return a
+
+# iou computation. support function overload (RLEs-RLEs and bbox-bbox).
+def iou( dt, gt, pyiscrowd ):
+    def _preproc(objs):
+        if len(objs) == 0:
+            return objs
+        if type(objs) == np.ndarray:
+            if len(objs.shape) == 1:
+                objs = objs.reshape((objs[0], 1))
+            # check if it's Nx4 bbox
+            if not len(objs.shape) == 2 or not objs.shape[1] == 4:
+                raise Exception('numpy ndarray input is only for *bounding boxes* and should have Nx4 dimension')
+            objs = objs.astype(np.double)
+        elif type(objs) == list:
+            # check if list is in box format and convert it to np.ndarray
+            isbox = np.all(np.array([(len(obj)==4) and ((type(obj)==list) or (type(obj)==np.ndarray)) for obj in objs]))
+            isrle = np.all(np.array([type(obj) == dict for obj in objs]))
+            if isbox:
+                objs = np.array(objs, dtype=np.double)
+                if len(objs.shape) == 1:
+                    objs = objs.reshape((1,objs.shape[0]))
+            elif isrle:
+                objs = _frString(objs)
+            else:
+                raise Exception('list input can be bounding box (Nx4) or RLEs ([RLE])')
+        else:
+            raise Exception('unrecognized type.  The following type: RLEs (rle), np.ndarray (box), and list (box) are supported.')
+        return objs
+    def _rleIou(RLEs dt, RLEs gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t,  ndim=1] _iou):
+        rleIou( <RLE*> dt._R, <RLE*> gt._R, m, n, <byte*> iscrowd.data, <double*> _iou.data )
+    def _bbIou(np.ndarray[np.double_t, ndim=2] dt, np.ndarray[np.double_t, ndim=2] gt, np.ndarray[np.uint8_t, ndim=1] iscrowd, siz m, siz n, np.ndarray[np.double_t, ndim=1] _iou):
+        bbIou( <BB> dt.data, <BB> gt.data, m, n, <byte*> iscrowd.data, <double*>_iou.data )
+    def _len(obj):
+        cdef siz N = 0
+        if type(obj) == RLEs:
+            N = obj.n
+        elif len(obj)==0:
+            pass
+        elif type(obj) == np.ndarray:
+            N = obj.shape[0]
+        return N
+    # convert iscrowd to numpy array
+    cdef np.ndarray[np.uint8_t, ndim=1] iscrowd = np.array(pyiscrowd, dtype=np.uint8)
+    # simple type checking
+    cdef siz m, n
+    dt = _preproc(dt)
+    gt = _preproc(gt)
+    m = _len(dt)
+    n = _len(gt)
+    if m == 0 or n == 0:
+        return []
+    if not type(dt) == type(gt):
+        raise Exception('The dt and gt should have the same data type, either RLEs, list or np.ndarray')
+
+    # define local variables
+    cdef double* _iou = <double*> 0
+    cdef np.npy_intp shape[1]
+    # check type and assign iou function
+    if type(dt) == RLEs:
+        _iouFun = _rleIou
+    elif type(dt) == np.ndarray:
+        _iouFun = _bbIou
+    else:
+        raise Exception('input data type not allowed.')
+    _iou = <double*> malloc(m*n* sizeof(double))
+    iou = np.zeros((m*n, ), dtype=np.double)
+    shape[0] = <np.npy_intp> m*n
+    iou = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _iou)
+    PyArray_ENABLEFLAGS(iou, np.NPY_OWNDATA)
+    _iouFun(dt, gt, iscrowd, m, n, iou)
+    return iou.reshape((m,n), order='F')
+
+def toBbox( rleObjs ):
+    cdef RLEs Rs = _frString(rleObjs)
+    cdef siz n = Rs.n
+    cdef BB _bb = <BB> malloc(4*n* sizeof(double))
+    rleToBbox( <const RLE*> Rs._R, _bb, n )
+    cdef np.npy_intp shape[1]
+    shape[0] = <np.npy_intp> 4*n
+    bb = np.array((1,4*n), dtype=np.double)
+    bb = np.PyArray_SimpleNewFromData(1, shape, np.NPY_DOUBLE, _bb).reshape((n, 4))
+    PyArray_ENABLEFLAGS(bb, np.NPY_OWNDATA)
+    return bb
+
+def frBbox(np.ndarray[np.double_t, ndim=2] bb, siz h, siz w ):
+    cdef siz n = bb.shape[0]
+    Rs = RLEs(n)
+    rleFrBbox( <RLE*> Rs._R, <const BB> bb.data, h, w, n )
+    objs = _toString(Rs)
+    return objs
+
+def frPoly( poly, siz h, siz w ):
+    cdef np.ndarray[np.double_t, ndim=1] np_poly
+    n = len(poly)
+    Rs = RLEs(n)
+    for i, p in enumerate(poly):
+        np_poly = np.array(p, dtype=np.double, order='F')
+        rleFrPoly( <RLE*>&Rs._R[i], <const double*> np_poly.data, int(len(p)/2), h, w )
+    objs = _toString(Rs)
+    return objs
+
+def frUncompressedRLE(ucRles, siz h, siz w):
+    cdef np.ndarray[np.uint32_t, ndim=1] cnts
+    cdef RLE R
+    cdef uint *data
+    n = len(ucRles)
+    objs = []
+    for i in range(n):
+        Rs = RLEs(1)
+        cnts = np.array(ucRles[i]['counts'], dtype=np.uint32)
+        # time for malloc can be saved here but it's fine
+        data = <uint*> malloc(len(cnts)* sizeof(uint))
+        for j in range(len(cnts)):
+            data[j] = <uint> cnts[j]
+        R = RLE(ucRles[i]['size'][0], ucRles[i]['size'][1], len(cnts), <uint*> data)
+        Rs._R[0] = R
+        objs.append(_toString(Rs)[0])
+    return objs
+
+def frPyObjects(pyobj, h, w):
+    # encode rle from a list of python objects
+    if type(pyobj) == np.ndarray:
+        objs = frBbox(pyobj, h, w)
+    elif type(pyobj) == list and len(pyobj[0]) == 4:
+        objs = frBbox(pyobj, h, w)
+    elif type(pyobj) == list and len(pyobj[0]) > 4:
+        objs = frPoly(pyobj, h, w)
+    elif type(pyobj) == list and type(pyobj[0]) == dict \
+        and 'counts' in pyobj[0] and 'size' in pyobj[0]:
+        objs = frUncompressedRLE(pyobj, h, w)
+    # encode rle from single python object
+    elif type(pyobj) == list and len(pyobj) == 4:
+        objs = frBbox([pyobj], h, w)[0]
+    elif type(pyobj) == list and len(pyobj) > 4:
+        objs = frPoly([pyobj], h, w)[0]
+    elif type(pyobj) == dict and 'counts' in pyobj and 'size' in pyobj:
+        objs = frUncompressedRLE([pyobj], h, w)[0]
+    else:
+        raise Exception('input type is not supported.')
+    return objs
diff --git a/nlp_metrics/pycocotools/coco.py b/nlp_metrics/pycocotools/coco.py
new file mode 100644
index 0000000..fecbde9
--- /dev/null
+++ b/nlp_metrics/pycocotools/coco.py
@@ -0,0 +1,441 @@
+__author__ = 'tylin'
+__version__ = '2.0'
+# Interface for accessing the Microsoft COCO dataset.
+
+# Microsoft COCO is a large image dataset designed for object detection,
+# segmentation, and caption generation. pycocotools is a Python API that
+# assists in loading, parsing and visualizing the annotations in COCO.
+# Please visit http://mscoco.org/ for more information on COCO, including
+# for the data, paper, and tutorials. The exact format of the annotations
+# is also described on the COCO website. For example usage of the pycocotools
+# please see pycocotools_demo.ipynb. In addition to this API, please download both
+# the COCO images and annotations in order to run the demo.
+
+# An alternative to using the API is to load the annotations directly
+# into Python dictionary
+# Using the API provides additional utility functions. Note that this API
+# supports both *instance* and *caption* annotations. In the case of
+# captions not all functions are defined (e.g. categories are undefined).
+
+# The following API functions are defined:
+#  COCO       - COCO api class that loads COCO annotation file and prepare data structures.
+#  decodeMask - Decode binary mask M encoded via run-length encoding.
+#  encodeMask - Encode binary mask M using run-length encoding.
+#  getAnnIds  - Get ann ids that satisfy given filter conditions.
+#  getCatIds  - Get cat ids that satisfy given filter conditions.
+#  getImgIds  - Get img ids that satisfy given filter conditions.
+#  loadAnns   - Load anns with the specified ids.
+#  loadCats   - Load cats with the specified ids.
+#  loadImgs   - Load imgs with the specified ids.
+#  annToMask  - Convert segmentation in an annotation to binary mask.
+#  showAnns   - Display the specified annotations.
+#  loadRes    - Load algorithm results and create API for accessing them.
+#  download   - Download COCO images from mscoco.org server.
+# Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
+# Help on each functions can be accessed by: "help COCO>function".
+
+# See also COCO>decodeMask,
+# COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
+# COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
+# COCO>loadImgs, COCO>annToMask, COCO>showAnns
+
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
+# Licensed under the Simplified BSD License [see bsd.txt]
+
+import json
+import time
+import matplotlib.pyplot as plt
+from matplotlib.collections import PatchCollection
+from matplotlib.patches import Polygon
+import numpy as np
+import copy
+import itertools
+from nlp_metrics.pycocotools import mask as maskUtils
+import os
+from collections import defaultdict
+import sys
+PYTHON_VERSION = sys.version_info[0]
+if PYTHON_VERSION == 2:
+    from urllib import urlretrieve
+elif PYTHON_VERSION == 3:
+    from urllib.request import urlretrieve
+
+
+def _isArrayLike(obj):
+    return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
+
+
+class COCO:
+    def __init__(self, annotation_file=None):
+        """
+        Constructor of Microsoft COCO helper class for reading and visualizing annotations.
+        :param annotation_file (str): location of annotation file
+        :param image_folder (str): location to the folder that hosts images.
+        :return:
+        """
+        # load dataset
+        self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
+        self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
+        if not annotation_file == None:
+            print('loading annotations into memory...')
+            tic = time.time()
+            dataset = json.load(open(annotation_file, 'r'))
+            assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
+            print('Done (t={:0.2f}s)'.format(time.time()- tic))
+            self.dataset = dataset
+            self.createIndex()
+
+    def createIndex(self):
+        # create index
+        print('creating index...')
+        anns, cats, imgs = {}, {}, {}
+        imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
+        if 'annotations' in self.dataset:
+            for ann in self.dataset['annotations']:
+                imgToAnns[ann['image_id']].append(ann)
+                anns[ann['id']] = ann
+
+        if 'images' in self.dataset:
+            for img in self.dataset['images']:
+                imgs[img['id']] = img
+
+        if 'categories' in self.dataset:
+            for cat in self.dataset['categories']:
+                cats[cat['id']] = cat
+
+        if 'annotations' in self.dataset and 'categories' in self.dataset:
+            for ann in self.dataset['annotations']:
+                catToImgs[ann['category_id']].append(ann['image_id'])
+
+        print('index created!')
+
+        # create class members
+        self.anns = anns
+        self.imgToAnns = imgToAnns
+        self.catToImgs = catToImgs
+        self.imgs = imgs
+        self.cats = cats
+
+    def info(self):
+        """
+        Print information about the annotation file.
+        :return:
+        """
+        for key, value in self.dataset['info'].items():
+            print('{}: {}'.format(key, value))
+
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+        """
+        Get ann ids that satisfy given filter conditions. default skips that filter
+        :param imgIds  (int array)     : get anns for given imgs
+               catIds  (int array)     : get anns for given cats
+               areaRng (float array)   : get anns for given area range (e.g. [0 inf])
+               iscrowd (boolean)       : get anns for given crowd label (False or True)
+        :return: ids (int array)       : integer array of ann ids
+        """
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(imgIds) == len(catIds) == len(areaRng) == 0:
+            anns = self.dataset['annotations']
+        else:
+            if not len(imgIds) == 0:
+                lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
+                anns = list(itertools.chain.from_iterable(lists))
+            else:
+                anns = self.dataset['annotations']
+            anns = anns if len(catIds)  == 0 else [ann for ann in anns if ann['category_id'] in catIds]
+            anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
+        if not iscrowd == None:
+            ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
+        else:
+            ids = [ann['id'] for ann in anns]
+        return ids
+
+    def getCatIds(self, catNms=[], supNms=[], catIds=[]):
+        """
+        filtering parameters. default skips that filter.
+        :param catNms (str array)  : get cats for given cat names
+        :param supNms (str array)  : get cats for given supercategory names
+        :param catIds (int array)  : get cats for given cat ids
+        :return: ids (int array)   : integer array of cat ids
+        """
+        catNms = catNms if _isArrayLike(catNms) else [catNms]
+        supNms = supNms if _isArrayLike(supNms) else [supNms]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(catNms) == len(supNms) == len(catIds) == 0:
+            cats = self.dataset['categories']
+        else:
+            cats = self.dataset['categories']
+            cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name']          in catNms]
+            cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
+            cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id']            in catIds]
+        ids = [cat['id'] for cat in cats]
+        return ids
+
+    def getImgIds(self, imgIds=[], catIds=[]):
+        '''
+        Get img ids that satisfy given filter conditions.
+        :param imgIds (int array) : get imgs for given ids
+        :param catIds (int array) : get imgs with all given cats
+        :return: ids (int array)  : integer array of img ids
+        '''
+        imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
+        catIds = catIds if _isArrayLike(catIds) else [catIds]
+
+        if len(imgIds) == len(catIds) == 0:
+            ids = self.imgs.keys()
+        else:
+            ids = set(imgIds)
+            for i, catId in enumerate(catIds):
+                if i == 0 and len(ids) == 0:
+                    ids = set(self.catToImgs[catId])
+                else:
+                    ids &= set(self.catToImgs[catId])
+        return list(ids)
+
+    def loadAnns(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying anns
+        :return: anns (object array) : loaded ann objects
+        """
+        if _isArrayLike(ids):
+            return [self.anns[id] for id in ids]
+        elif type(ids) == int:
+            return [self.anns[ids]]
+
+    def loadCats(self, ids=[]):
+        """
+        Load cats with the specified ids.
+        :param ids (int array)       : integer ids specifying cats
+        :return: cats (object array) : loaded cat objects
+        """
+        if _isArrayLike(ids):
+            return [self.cats[id] for id in ids]
+        elif type(ids) == int:
+            return [self.cats[ids]]
+
+    def loadImgs(self, ids=[]):
+        """
+        Load anns with the specified ids.
+        :param ids (int array)       : integer ids specifying img
+        :return: imgs (object array) : loaded img objects
+        """
+        if _isArrayLike(ids):
+            return [self.imgs[id] for id in ids]
+        elif type(ids) == int:
+            return [self.imgs[ids]]
+
+    def showAnns(self, anns, draw_bbox=False):
+        """
+        Display the specified annotations.
+        :param anns (array of object): annotations to display
+        :return: None
+        """
+        if len(anns) == 0:
+            return 0
+        if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
+            datasetType = 'instances'
+        elif 'caption' in anns[0]:
+            datasetType = 'captions'
+        else:
+            raise Exception('datasetType not supported')
+        if datasetType == 'instances':
+            ax = plt.gca()
+            ax.set_autoscale_on(False)
+            polygons = []
+            color = []
+            for ann in anns:
+                c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
+                if 'segmentation' in ann:
+                    if type(ann['segmentation']) == list:
+                        # polygon
+                        for seg in ann['segmentation']:
+                            poly = np.array(seg).reshape((int(len(seg)/2), 2))
+                            polygons.append(Polygon(poly))
+                            color.append(c)
+                    else:
+                        # mask
+                        t = self.imgs[ann['image_id']]
+                        if type(ann['segmentation']['counts']) == list:
+                            rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
+                        else:
+                            rle = [ann['segmentation']]
+                        m = maskUtils.decode(rle)
+                        img = np.ones( (m.shape[0], m.shape[1], 3) )
+                        if ann['iscrowd'] == 1:
+                            color_mask = np.array([2.0,166.0,101.0])/255
+                        if ann['iscrowd'] == 0:
+                            color_mask = np.random.random((1, 3)).tolist()[0]
+                        for i in range(3):
+                            img[:,:,i] = color_mask[i]
+                        ax.imshow(np.dstack( (img, m*0.5) ))
+                if 'keypoints' in ann and type(ann['keypoints']) == list:
+                    # turn skeleton into zero-based index
+                    sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
+                    kp = np.array(ann['keypoints'])
+                    x = kp[0::3]
+                    y = kp[1::3]
+                    v = kp[2::3]
+                    for sk in sks:
+                        if np.all(v[sk]>0):
+                            plt.plot(x[sk],y[sk], linewidth=3, color=c)
+                    plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
+                    plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
+
+                if draw_bbox:
+                    [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
+                    poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]]
+                    np_poly = np.array(poly).reshape((4,2))
+                    polygons.append(Polygon(np_poly))
+                    color.append(c)
+
+            p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
+            ax.add_collection(p)
+            p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
+            ax.add_collection(p)
+        elif datasetType == 'captions':
+            for ann in anns:
+                print(ann['caption'])
+
+    def loadRes(self, resFile):
+        """
+        Load result file and return a result api object.
+        :param   resFile (str)     : file name of result file
+        :return: res (obj)         : result api object
+        """
+        res = COCO()
+        res.dataset['images'] = [img for img in self.dataset['images']]
+
+        print('Loading and preparing results...')
+        tic = time.time()
+        if type(resFile) == str or (PYTHON_VERSION == 2 and type(resFile) == unicode):
+            anns = json.load(open(resFile))
+        elif type(resFile) == np.ndarray:
+            anns = self.loadNumpyAnnotations(resFile)
+        else:
+            anns = resFile
+        assert type(anns) == list, 'results in not an array of objects'
+        annsImgIds = [ann['image_id'] for ann in anns]
+        assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
+               'Results do not correspond to current coco set'
+        if 'caption' in anns[0]:
+            imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
+            res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
+            for id, ann in enumerate(anns):
+                ann['id'] = id+1
+        elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                bb = ann['bbox']
+                x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
+                if not 'segmentation' in ann:
+                    ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+                ann['area'] = bb[2]*bb[3]
+                ann['id'] = id+1
+                ann['iscrowd'] = 0
+        elif 'segmentation' in anns[0]:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                # now only support compressed RLE format as segmentation results
+                ann['area'] = maskUtils.area(ann['segmentation'])
+                if not 'bbox' in ann:
+                    ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
+                ann['id'] = id+1
+                ann['iscrowd'] = 0
+        elif 'keypoints' in anns[0]:
+            res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
+            for id, ann in enumerate(anns):
+                s = ann['keypoints']
+                x = s[0::3]
+                y = s[1::3]
+                x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+                ann['area'] = (x1-x0)*(y1-y0)
+                ann['id'] = id + 1
+                ann['bbox'] = [x0,y0,x1-x0,y1-y0]
+        print('DONE (t={:0.2f}s)'.format(time.time()- tic))
+
+        res.dataset['annotations'] = anns
+        res.createIndex()
+        return res
+
+    def download(self, tarDir = None, imgIds = [] ):
+        '''
+        Download COCO images from mscoco.org server.
+        :param tarDir (str): COCO results directory name
+               imgIds (list): images to be downloaded
+        :return:
+        '''
+        if tarDir is None:
+            print('Please specify target directory')
+            return -1
+        if len(imgIds) == 0:
+            imgs = self.imgs.values()
+        else:
+            imgs = self.loadImgs(imgIds)
+        N = len(imgs)
+        if not os.path.exists(tarDir):
+            os.makedirs(tarDir)
+        for i, img in enumerate(imgs):
+            tic = time.time()
+            fname = os.path.join(tarDir, img['file_name'])
+            if not os.path.exists(fname):
+                urlretrieve(img['coco_url'], fname)
+            print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
+
+    def loadNumpyAnnotations(self, data):
+        """
+        Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
+        :param  data (numpy.ndarray)
+        :return: annotations (python nested list)
+        """
+        print('Converting ndarray to lists...')
+        assert(type(data) == np.ndarray)
+        print(data.shape)
+        assert(data.shape[1] == 7)
+        N = data.shape[0]
+        ann = []
+        for i in range(N):
+            if i % 1000000 == 0:
+                print('{}/{}'.format(i,N))
+            ann += [{
+                'image_id'  : int(data[i, 0]),
+                'bbox'  : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
+                'score' : data[i, 5],
+                'category_id': int(data[i, 6]),
+                }]
+        return ann
+
+    def annToRLE(self, ann):
+        """
+        Convert annotation which can be polygons, uncompressed RLE to RLE.
+        :return: binary mask (numpy 2D array)
+        """
+        t = self.imgs[ann['image_id']]
+        h, w = t['height'], t['width']
+        segm = ann['segmentation']
+        if type(segm) == list:
+            # polygon -- a single object might consist of multiple parts
+            # we merge all parts into one mask rle code
+            rles = maskUtils.frPyObjects(segm, h, w)
+            rle = maskUtils.merge(rles)
+        elif type(segm['counts']) == list:
+            # uncompressed RLE
+            rle = maskUtils.frPyObjects(segm, h, w)
+        else:
+            # rle
+            rle = ann['segmentation']
+        return rle
+
+    def annToMask(self, ann):
+        """
+        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
+        :return: binary mask (numpy 2D array)
+        """
+        rle = self.annToRLE(ann)
+        m = maskUtils.decode(rle)
+        return m
\ No newline at end of file
diff --git a/nlp_metrics/pycocotools/cocoeval.py b/nlp_metrics/pycocotools/cocoeval.py
new file mode 100644
index 0000000..f4e3dec
--- /dev/null
+++ b/nlp_metrics/pycocotools/cocoeval.py
@@ -0,0 +1,534 @@
+__author__ = 'tsungyi'
+
+import numpy as np
+import datetime
+import time
+from collections import defaultdict
+from . import mask as maskUtils
+import copy
+
+class COCOeval:
+    # Interface for evaluating detection on the Microsoft COCO dataset.
+    #
+    # The usage for CocoEval is as follows:
+    #  cocoGt=..., cocoDt=...       # load dataset and results
+    #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
+    #  E.params.recThrs = ...;      # set parameters as desired
+    #  E.evaluate();                # run per image evaluation
+    #  E.accumulate();              # accumulate per image results
+    #  E.summarize();               # display summary metrics of results
+    # For example usage see evalDemo.m and http://mscoco.org/.
+    #
+    # The evaluation parameters are as follows (defaults in brackets):
+    #  imgIds     - [all] N img ids to use for evaluation
+    #  catIds     - [all] K cat ids to use for evaluation
+    #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
+    #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
+    #  areaRng    - [...] A=4 object area ranges for evaluation
+    #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
+    #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
+    #  iouType replaced the now DEPRECATED useSegm parameter.
+    #  useCats    - [1] if true use category labels for evaluation
+    # Note: if useCats=0 category labels are ignored as in proposal scoring.
+    # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
+    #
+    # evaluate(): evaluates detections on every image and every category and
+    # concats the results into the "evalImgs" with fields:
+    #  dtIds      - [1xD] id for each of the D detections (dt)
+    #  gtIds      - [1xG] id for each of the G ground truths (gt)
+    #  dtMatches  - [TxD] matching gt id at each IoU or 0
+    #  gtMatches  - [TxG] matching dt id at each IoU or 0
+    #  dtScores   - [1xD] confidence of each dt
+    #  gtIgnore   - [1xG] ignore flag for each gt
+    #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
+    #
+    # accumulate(): accumulates the per-image, per-category evaluation
+    # results in "evalImgs" into the dictionary "eval" with fields:
+    #  params     - parameters used for evaluation
+    #  date       - date evaluation was performed
+    #  counts     - [T,R,K,A,M] parameter dimensions (see above)
+    #  precision  - [TxRxKxAxM] precision for every evaluation setting
+    #  recall     - [TxKxAxM] max recall for every evaluation setting
+    # Note: precision and recall==-1 for settings with no gt objects.
+    #
+    # See also coco, mask, pycocoDemo, pycocoEvalDemo
+    #
+    # Microsoft COCO Toolbox.      version 2.0
+    # Data, paper, and tutorials available at:  http://mscoco.org/
+    # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+    # Licensed under the Simplified BSD License [see coco/license.txt]
+    def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
+        '''
+        Initialize CocoEval using coco APIs for gt and dt
+        :param cocoGt: coco object with ground truth annotations
+        :param cocoDt: coco object with detection results
+        :return: None
+        '''
+        if not iouType:
+            print('iouType not specified. use default iouType segm')
+        self.cocoGt   = cocoGt              # ground truth COCO API
+        self.cocoDt   = cocoDt              # detections COCO API
+        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
+        self.eval     = {}                  # accumulated evaluation results
+        self._gts = defaultdict(list)       # gt for evaluation
+        self._dts = defaultdict(list)       # dt for evaluation
+        self.params = Params(iouType=iouType) # parameters
+        self._paramsEval = {}               # parameters for evaluation
+        self.stats = []                     # result summarization
+        self.ious = {}                      # ious between all gts and dts
+        if not cocoGt is None:
+            self.params.imgIds = sorted(cocoGt.getImgIds())
+            self.params.catIds = sorted(cocoGt.getCatIds())
+
+
+    def _prepare(self):
+        '''
+        Prepare ._gts and ._dts for evaluation based on params
+        :return: None
+        '''
+        def _toMask(anns, coco):
+            # modify ann['segmentation'] by reference
+            for ann in anns:
+                rle = coco.annToRLE(ann)
+                ann['segmentation'] = rle
+        p = self.params
+        if p.useCats:
+            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
+        else:
+            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+
+        # convert ground truth to mask if iouType == 'segm'
+        if p.iouType == 'segm':
+            _toMask(gts, self.cocoGt)
+            _toMask(dts, self.cocoDt)
+        # set ignore flag
+        for gt in gts:
+            gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
+            gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
+            if p.iouType == 'keypoints':
+                gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
+        self._gts = defaultdict(list)       # gt for evaluation
+        self._dts = defaultdict(list)       # dt for evaluation
+        for gt in gts:
+            self._gts[gt['image_id'], gt['category_id']].append(gt)
+        for dt in dts:
+            self._dts[dt['image_id'], dt['category_id']].append(dt)
+        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
+        self.eval     = {}                  # accumulated evaluation results
+
+    def evaluate(self):
+        '''
+        Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
+        :return: None
+        '''
+        tic = time.time()
+        print('Running per image evaluation...')
+        p = self.params
+        # add backward compatibility if useSegm is specified in params
+        if not p.useSegm is None:
+            p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
+            print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
+        print('Evaluate annotation type *{}*'.format(p.iouType))
+        p.imgIds = list(np.unique(p.imgIds))
+        if p.useCats:
+            p.catIds = list(np.unique(p.catIds))
+        p.maxDets = sorted(p.maxDets)
+        self.params=p
+
+        self._prepare()
+        # loop through images, area range, max detection number
+        catIds = p.catIds if p.useCats else [-1]
+
+        if p.iouType == 'segm' or p.iouType == 'bbox':
+            computeIoU = self.computeIoU
+        elif p.iouType == 'keypoints':
+            computeIoU = self.computeOks
+        self.ious = {(imgId, catId): computeIoU(imgId, catId) \
+                        for imgId in p.imgIds
+                        for catId in catIds}
+
+        evaluateImg = self.evaluateImg
+        maxDet = p.maxDets[-1]
+        self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
+                 for catId in catIds
+                 for areaRng in p.areaRng
+                 for imgId in p.imgIds
+             ]
+        self._paramsEval = copy.deepcopy(self.params)
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format(toc-tic))
+
+    def computeIoU(self, imgId, catId):
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId,catId]
+            dt = self._dts[imgId,catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
+        if len(gt) == 0 and len(dt) ==0:
+            return []
+        inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
+        dt = [dt[i] for i in inds]
+        if len(dt) > p.maxDets[-1]:
+            dt=dt[0:p.maxDets[-1]]
+
+        if p.iouType == 'segm':
+            g = [g['segmentation'] for g in gt]
+            d = [d['segmentation'] for d in dt]
+        elif p.iouType == 'bbox':
+            g = [g['bbox'] for g in gt]
+            d = [d['bbox'] for d in dt]
+        else:
+            raise Exception('unknown iouType for iou computation')
+
+        # compute iou between each dt and gt region
+        iscrowd = [int(o['iscrowd']) for o in gt]
+        ious = maskUtils.iou(d,g,iscrowd)
+        return ious
+
+    def computeOks(self, imgId, catId):
+        p = self.params
+        # dimention here should be Nxm
+        gts = self._gts[imgId, catId]
+        dts = self._dts[imgId, catId]
+        inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
+        dts = [dts[i] for i in inds]
+        if len(dts) > p.maxDets[-1]:
+            dts = dts[0:p.maxDets[-1]]
+        # if len(gts) == 0 and len(dts) == 0:
+        if len(gts) == 0 or len(dts) == 0:
+            return []
+        ious = np.zeros((len(dts), len(gts)))
+        sigmas = p.kpt_oks_sigmas
+        vars = (sigmas * 2)**2
+        k = len(sigmas)
+        # compute oks between each detection and ground truth object
+        for j, gt in enumerate(gts):
+            # create bounds for ignore regions(double the gt bbox)
+            g = np.array(gt['keypoints'])
+            xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
+            k1 = np.count_nonzero(vg > 0)
+            bb = gt['bbox']
+            x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
+            y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
+            for i, dt in enumerate(dts):
+                d = np.array(dt['keypoints'])
+                xd = d[0::3]; yd = d[1::3]
+                if k1>0:
+                    # measure the per-keypoint distance if keypoints visible
+                    dx = xd - xg
+                    dy = yd - yg
+                else:
+                    # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
+                    z = np.zeros((k))
+                    dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
+                    dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
+                e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
+                if k1 > 0:
+                    e=e[vg > 0]
+                ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
+        return ious
+
+    def evaluateImg(self, imgId, catId, aRng, maxDet):
+        '''
+        perform evaluation for single category and image
+        :return: dict (single image results)
+        '''
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId,catId]
+            dt = self._dts[imgId,catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
+        if len(gt) == 0 and len(dt) ==0:
+            return None
+
+        for g in gt:
+            if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
+                g['_ignore'] = 1
+            else:
+                g['_ignore'] = 0
+
+        # sort dt highest score first, sort gt ignore last
+        gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
+        gt = [gt[i] for i in gtind]
+        dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
+        dt = [dt[i] for i in dtind[0:maxDet]]
+        iscrowd = [int(o['iscrowd']) for o in gt]
+        # load computed ious
+        ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]
+
+        T = len(p.iouThrs)
+        G = len(gt)
+        D = len(dt)
+        gtm  = np.zeros((T,G))
+        dtm  = np.zeros((T,D))
+        gtIg = np.array([g['_ignore'] for g in gt])
+        dtIg = np.zeros((T,D))
+        if not len(ious)==0:
+            for tind, t in enumerate(p.iouThrs):
+                for dind, d in enumerate(dt):
+                    # information about best match so far (m=-1 -> unmatched)
+                    iou = min([t,1-1e-10])
+                    m   = -1
+                    for gind, g in enumerate(gt):
+                        # if this gt already matched, and not a crowd, continue
+                        if gtm[tind,gind]>0 and not iscrowd[gind]:
+                            continue
+                        # if dt matched to reg gt, and on ignore gt, stop
+                        if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
+                            break
+                        # continue to next gt unless better match made
+                        if ious[dind,gind] < iou:
+                            continue
+                        # if match successful and best so far, store appropriately
+                        iou=ious[dind,gind]
+                        m=gind
+                    # if match made store id of match for both dt and gt
+                    if m ==-1:
+                        continue
+                    dtIg[tind,dind] = gtIg[m]
+                    dtm[tind,dind]  = gt[m]['id']
+                    gtm[tind,m]     = d['id']
+        # set unmatched detections outside of area range to ignore
+        a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
+        dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
+        # store results for given image and category
+        return {
+                'image_id':     imgId,
+                'category_id':  catId,
+                'aRng':         aRng,
+                'maxDet':       maxDet,
+                'dtIds':        [d['id'] for d in dt],
+                'gtIds':        [g['id'] for g in gt],
+                'dtMatches':    dtm,
+                'gtMatches':    gtm,
+                'dtScores':     [d['score'] for d in dt],
+                'gtIgnore':     gtIg,
+                'dtIgnore':     dtIg,
+            }
+
+    def accumulate(self, p = None):
+        '''
+        Accumulate per image evaluation results and store the result in self.eval
+        :param p: input params for evaluation
+        :return: None
+        '''
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0:
+                        continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0:
+                        continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+                        q  = np.zeros((R,))
+                        ss = np.zeros((R,))
+
+                        if nd:
+                            recall[t,k,a,m] = rc[-1]
+                        else:
+                            recall[t,k,a,m] = 0
+
+                        # numpy is slow without cython optimization for accessing elements
+                        # use python array gets significant speed improvement
+                        pr = pr.tolist(); q = q.tolist()
+
+                        for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                            for ri, pi in enumerate(inds):
+                                q[ri] = pr[pi]
+                                ss[ri] = dtScoresSorted[pi]
+                        except:
+                            pass
+                        precision[t,:,k,a,m] = np.array(q)
+                        scores[t,:,k,a,m] = np.array(ss)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+    def summarize(self):
+        '''
+        Compute and display summary metrics for evaluation results.
+        Note this functin can *only* be applied on the default parameter setting
+        '''
+        def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
+            p = self.params
+            iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
+            titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+            typeStr = '(AP)' if ap==1 else '(AR)'
+            iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
+                if iouThr is None else '{:0.2f}'.format(iouThr)
+
+            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+            if ap == 1:
+                # dimension of precision: [TxRxKxAxM]
+                s = self.eval['precision']
+                # IoU
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:,:,:,aind,mind]
+            else:
+                # dimension of recall: [TxKxAxM]
+                s = self.eval['recall']
+                if iouThr is not None:
+                    t = np.where(iouThr == p.iouThrs)[0]
+                    s = s[t]
+                s = s[:,:,aind,mind]
+            if len(s[s>-1])==0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s>-1])
+            print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
+            return mean_s
+        def _summarizeDets():
+            stats = np.zeros((12,))
+            stats[0] = _summarize(1)
+            stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
+            stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
+            stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
+            stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
+            stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
+            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
+            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
+            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
+            stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
+            stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
+            stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
+            return stats
+        def _summarizeKps():
+            stats = np.zeros((10,))
+            stats[0] = _summarize(1, maxDets=20)
+            stats[1] = _summarize(1, maxDets=20, iouThr=.5)
+            stats[2] = _summarize(1, maxDets=20, iouThr=.75)
+            stats[3] = _summarize(1, maxDets=20, areaRng='medium')
+            stats[4] = _summarize(1, maxDets=20, areaRng='large')
+            stats[5] = _summarize(0, maxDets=20)
+            stats[6] = _summarize(0, maxDets=20, iouThr=.5)
+            stats[7] = _summarize(0, maxDets=20, iouThr=.75)
+            stats[8] = _summarize(0, maxDets=20, areaRng='medium')
+            stats[9] = _summarize(0, maxDets=20, areaRng='large')
+            return stats
+        if not self.eval:
+            raise Exception('Please run accumulate() first')
+        iouType = self.params.iouType
+        if iouType == 'segm' or iouType == 'bbox':
+            summarize = _summarizeDets
+        elif iouType == 'keypoints':
+            summarize = _summarizeKps
+        self.stats = summarize()
+
+    def __str__(self):
+        self.summarize()
+
+class Params:
+    '''
+    Params for coco evaluation api
+    '''
+    def setDetParams(self):
+        self.imgIds = []
+        self.catIds = []
+        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
+        self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
+        self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
+        self.maxDets = [1, 10, 100]
+        self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+        self.areaRngLbl = ['all', 'small', 'medium', 'large']
+        self.useCats = 1
+
+    def setKpParams(self):
+        self.imgIds = []
+        self.catIds = []
+        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
+        self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
+        self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
+        self.maxDets = [20]
+        self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
+        self.areaRngLbl = ['all', 'medium', 'large']
+        self.useCats = 1
+        self.kpt_oks_sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0
+
+    def __init__(self, iouType='segm'):
+        if iouType == 'segm' or iouType == 'bbox':
+            self.setDetParams()
+        elif iouType == 'keypoints':
+            self.setKpParams()
+        else:
+            raise Exception('iouType not supported')
+        self.iouType = iouType
+        # useSegm is deprecated
+        self.useSegm = None
diff --git a/nlp_metrics/pycocotools/mask.py b/nlp_metrics/pycocotools/mask.py
new file mode 100644
index 0000000..b5493fe
--- /dev/null
+++ b/nlp_metrics/pycocotools/mask.py
@@ -0,0 +1,103 @@
+__author__ = 'tsungyi'
+
+import nlp_metrics.pycocotools._mask as _mask
+
+# Interface for manipulating masks stored in RLE format.
+#
+# RLE is a simple yet efficient format for storing binary masks. RLE
+# first divides a vector (or vectorized image) into a series of piecewise
+# constant regions and then for each piece simply stores the length of
+# that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
+# be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
+# (note that the odd counts are always the numbers of zeros). Instead of
+# storing the counts directly, additional compression is achieved with a
+# variable bitrate representation based on a common scheme called LEB128.
+#
+# Compression is greatest given large piecewise constant regions.
+# Specifically, the size of the RLE is proportional to the number of
+# *boundaries* in M (or for an image the number of boundaries in the y
+# direction). Assuming fairly simple shapes, the RLE representation is
+# O(sqrt(n)) where n is number of pixels in the object. Hence space usage
+# is substantially lower, especially for large simple objects (large n).
+#
+# Many common operations on masks can be computed directly using the RLE
+# (without need for decoding). This includes computations such as area,
+# union, intersection, etc. All of these operations are linear in the
+# size of the RLE, in other words they are O(sqrt(n)) where n is the area
+# of the object. Computing these operations on the original mask is O(n).
+# Thus, using the RLE can result in substantial computational savings.
+#
+# The following API functions are defined:
+#  encode         - Encode binary masks using RLE.
+#  decode         - Decode binary masks encoded via RLE.
+#  merge          - Compute union or intersection of encoded masks.
+#  iou            - Compute intersection over union between masks.
+#  area           - Compute area of encoded masks.
+#  toBbox         - Get bounding boxes surrounding encoded masks.
+#  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
+#
+# Usage:
+#  Rs     = encode( masks )
+#  masks  = decode( Rs )
+#  R      = merge( Rs, intersect=false )
+#  o      = iou( dt, gt, iscrowd )
+#  a      = area( Rs )
+#  bbs    = toBbox( Rs )
+#  Rs     = frPyObjects( [pyObjects], h, w )
+#
+# In the API the following formats are used:
+#  Rs      - [dict] Run-length encoding of binary masks
+#  R       - dict Run-length encoding of binary mask
+#  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
+#  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
+#  bbs     - [nx4] Bounding box(es) stored as [x y w h]
+#  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
+#  dt,gt   - May be either bounding boxes or encoded masks
+# Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
+#
+# Finally, a note about the intersection over union (iou) computation.
+# The standard iou of a ground truth (gt) and detected (dt) object is
+#  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
+# For "crowd" regions, we use a modified criteria. If a gt object is
+# marked as "iscrowd", we allow a dt to match any subregion of the gt.
+# Choosing gt' in the crowd gt that best matches the dt can be done using
+# gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
+#  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
+# For crowd gt regions we use this modified criteria above for the iou.
+#
+# To compile run "python setup.py build_ext --inplace"
+# Please do not contact us for help with compiling.
+#
+# Microsoft COCO Toolbox.      version 2.0
+# Data, paper, and tutorials available at:  http://mscoco.org/
+# Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
+# Licensed under the Simplified BSD License [see coco/license.txt]
+
+iou         = _mask.iou
+merge       = _mask.merge
+frPyObjects = _mask.frPyObjects
+
+def encode(bimask):
+    if len(bimask.shape) == 3:
+        return _mask.encode(bimask)
+    elif len(bimask.shape) == 2:
+        h, w = bimask.shape
+        return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
+
+def decode(rleObjs):
+    if type(rleObjs) == list:
+        return _mask.decode(rleObjs)
+    else:
+        return _mask.decode([rleObjs])[:,:,0]
+
+def area(rleObjs):
+    if type(rleObjs) == list:
+        return _mask.area(rleObjs)
+    else:
+        return _mask.area([rleObjs])[0]
+
+def toBbox(rleObjs):
+    if type(rleObjs) == list:
+        return _mask.toBbox(rleObjs)
+    else:
+        return _mask.toBbox([rleObjs])[0]
\ No newline at end of file
diff --git a/nlp_metrics/setup.py b/nlp_metrics/setup.py
new file mode 100644
index 0000000..0267835
--- /dev/null
+++ b/nlp_metrics/setup.py
@@ -0,0 +1,27 @@
+from setuptools import setup, Extension
+import numpy as np
+
+# To compile and install locally run "python setup.py build_ext --inplace"
+# To install library to Python site-packages run "python setup.py build_ext install"
+
+ext_modules = [
+    Extension(
+        'pycocotools._mask',
+        sources=['common/maskApi.c', 'pycocotools/_mask.pyx'],
+        include_dirs = [np.get_include(), 'common'],
+        extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
+    )
+]
+
+setup(
+    name='pycocotools',
+    packages=['pycocotools'],
+    package_dir = {'pycocotools': 'pycocotools'},
+    install_requires=[
+        'setuptools>=18.0',
+        'cython>=0.27.3',
+        'matplotlib>=2.1.0'
+    ],
+    version='2.0',
+    ext_modules= ext_modules
+)