Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
345 changed files
with
587,846 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
# Byte-compiled / optimized / DLL files | ||
__pycache__/ | ||
*.py[cod] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'rama' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
""" | ||
Code to convert mat file with structures into json files | ||
Created on : 5/18/15 3:27 PM by rama | ||
""" | ||
|
||
import scipy.io as io | ||
import os | ||
import re | ||
import json | ||
import string | ||
import pdb | ||
|
||
pathToMat = '/Users/rama/Research/data/pyCider/' | ||
matfile = 'pascal_cands.mat' | ||
jsonfile = 'pascal_cands' | ||
|
||
data = io.loadmat(os.path.join(pathToMat, matfile)) | ||
refs = list(data['cands'][0]) | ||
|
||
A = [] | ||
B = [] | ||
|
||
for image in refs: | ||
for sentences in image[1]: | ||
for i, sent in enumerate(sentences): | ||
sent_struct = {} | ||
imname = str(image[0][0]).split('/')[-1] | ||
sent_struct['image_id'] = imname | ||
string_sent = sent[0].strip().split('\\') | ||
if len(string_sent) == 1: | ||
sent_struct['caption'] = string_sent[0] | ||
else: | ||
sent_struct['caption'] = ' '.join(string_sent[:-1]) | ||
if i == 1: | ||
A.append(sent_struct) | ||
else: | ||
B.append(sent_struct) | ||
|
||
with open(os.path.join(pathToMat, jsonfile + 'A.json'), 'w') as outfile: | ||
json.dump(A, outfile) | ||
|
||
with open(os.path.join(pathToMat, jsonfile + 'B.json'), 'w') as outfile: | ||
json.dump(B, outfile) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
""" | ||
Load the reference and candidate json files, which are to be evaluated using CIDEr. | ||
Reference file: list of dict('image_id': image_id, 'caption': caption). | ||
Candidate file: list of dict('image_id': image_id, 'caption': caption). | ||
""" | ||
import json | ||
import os | ||
from collections import defaultdict | ||
|
||
class LoadData(): | ||
def __init__(self, path): | ||
self.pathToData = path | ||
|
||
def readJson(self, refname, candname): | ||
|
||
path_to_ref_file = os.path.join(self.pathToData, refname) | ||
path_to_cand_file = os.path.join(self.pathToData, candname) | ||
|
||
ref_list = json.loads(open(path_to_ref_file, 'r').read()) | ||
cand_list = json.loads(open(path_to_cand_file, 'r').read()) | ||
|
||
gts = defaultdict(list) | ||
res = [] | ||
|
||
for l in ref_list: | ||
gts[l['image_id']].append({"caption": l['caption']}) | ||
|
||
res = cand_list; | ||
return gts, res |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
Consensus-based Image Description Evaluation (CIDEr Code) | ||
=================== | ||
|
||
Evaluation code for CIDEr metric. Provides CIDEr as well as | ||
CIDEr-D (CIDEr Defended) which is more robust to gaming effects. | ||
|
||
## Important Note ## | ||
CIDEr by default (with idf parameter set to "corpus" mode) computes IDF values using the reference sentences provided. Thus, CIDEr score for a reference dataset with only 1 image will be zero. When evaluating using one (or few) images, set idf to "coco-val-df" instead, which uses IDF from the MSCOCO Vaildation Dataset for reliable results. | ||
|
||
## Requirements ## | ||
- java 1.8.0 | ||
- python 2.7 | ||
|
||
For running the ipython notebook file, update your Ipython to [Jupyter](https://jupyter.org/) | ||
|
||
## Files ## | ||
./ | ||
- cidereval.py (demo script) | ||
|
||
./PyDataFormat | ||
- loadData.py (load the json files for references and candidates) | ||
|
||
- {$result\_file}.json (file with the CIDEr and CIDEr-D scores) | ||
|
||
./pycocoevalcap: The folder where all evaluation codes are stored. | ||
- evals.py: Performs tokenization and runs both the metrics | ||
- tokenizer: Python wrapper of Stanford CoreNLP PTBTokenizer | ||
- cider: CIDEr evaluation codes | ||
- ciderD: CIDEr-D evaluation codes | ||
|
||
## Instructions ## | ||
1. Edit the params.json file to contain path to reference and candidate json files, and the result file where the scores are stored<sup>\*</sup>. | ||
2. Set the "idf" value in params.json to "corpus" if not evaluating on a single image/instance. Set the "idf" value to "coco-val-df" if evaluating on a single image. In this case IDF values from the MSCOCO dataset are used. If using some other corpus, get the document frequencies into a similar format as "coco-val-df", and put them in the data/ folder as a pickle file. Then set mode to the name of the document frequency file (without the '.p' extension). | ||
3. Sample json reference and candidate files are pascal50S.json and pascal_candsB.json | ||
4. CIDEr scores are stored in "scores" variable: scores['CIDEr'] -> CIDEr scores, scores['CIDErD'] -> CIDEr-D scores | ||
|
||
<sup>*</sup>Even when evaluating with independent candidate/references (for eg. when using "coco-val-df"), put multiple candidate and reference entries into the same json files. This is much faster than having separate candidate and reference files and calling the evaluation code separately on each candidate/reference file. | ||
## References ## | ||
|
||
- PTBTokenizer: We use the [Stanford Tokenizer](http://nlp.stanford.edu/software/tokenizer.shtml) which is included in [Stanford CoreNLP 3.4.1](http://nlp.stanford.edu/software/corenlp.shtml). | ||
- CIDEr: [CIDEr: Consensus-based Image Description Evaluation] (http://arxiv.org/pdf/1411.5726.pdf) | ||
|
||
## Developers ## | ||
- Ramakrishna Vedantam (Virgina Tech) | ||
|
||
## Acknowledgments ## | ||
- MS COCO Caption Evaluation Team |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# demo script for running CIDEr\n", | ||
"from PyDataFormat.loadData import LoadData\n", | ||
"import pdb\n", | ||
"import json\n", | ||
"from pyciderevalcap.eval import CIDErEvalCap as ciderEval\n", | ||
"from collections import defaultdict\n", | ||
"\n", | ||
"pathToData = './data/'\n", | ||
"\n", | ||
"refName = 'pascal50S.json'\n", | ||
"candName = 'pascal_candsB.json'\n", | ||
"\n", | ||
"result_file = 'results.json'" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 13, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# load reference and candidate sentences\n", | ||
"loadDat = LoadData(pathToData)\n", | ||
"gts, res = loadDat.readJson(refName, candName)\n", | ||
"\n", | ||
"#res = res[:100]\n", | ||
"#gts = {img['image_id']: gts[img['image_id']] for img in res}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from pyciderevalcap.ciderD.ciderD import CiderD\n", | ||
"from pyciderevalcap.cider.cider import Cider\n", | ||
"from pyciderevalcap.tokenizer.ptbtokenizer import PTBTokenizer\n", | ||
"tokenizer = PTBTokenizer('gts')\n", | ||
"_gts = tokenizer.tokenize(gts)\n", | ||
"tokenizer = PTBTokenizer('res')\n", | ||
"_res = tokenizer.tokenize(res)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"scorer = Cider(df='coco-val')\n", | ||
"scorerD = CiderD(df='coco-val')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 16, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"0.535560513246\n", | ||
"0.448542862876\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"score, scores = scorer.compute_score(_gts, _res)\n", | ||
"scoreD, scoresD = scorerD.compute_score(_gts, _res)\n", | ||
"print score\n", | ||
"print scoreD" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.11" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# coding: utf-8 | ||
|
||
# In[1]: | ||
|
||
# demo script for running CIDEr | ||
import json | ||
from pydataformat.loadData import LoadData | ||
from pyciderevalcap.eval import CIDErEvalCap as ciderEval | ||
|
||
# load the configuration file | ||
config = json.loads(open('params.json', 'r').read()) | ||
|
||
pathToData = config['pathToData'] | ||
refName = config['refName'] | ||
candName = config['candName'] | ||
resultFile = config['resultFile'] | ||
df_mode = config['idf'] | ||
|
||
# Print the parameters | ||
print "Running CIDEr with the following settings" | ||
print "*****************************" | ||
print "Reference File:%s" % (refName) | ||
print "Candidate File:%s" % (candName) | ||
print "Result File:%s" % (resultFile) | ||
print "IDF:%s" % (df_mode) | ||
print "*****************************" | ||
|
||
# In[2]: | ||
|
||
# load reference and candidate sentences | ||
loadDat = LoadData(pathToData) | ||
gts, res = loadDat.readJson(refName, candName) | ||
|
||
|
||
# In[3]: | ||
|
||
# calculate cider scores | ||
scorer = ciderEval(gts, res, df_mode) | ||
# scores: dict of list with key = metric and value = score given to each | ||
# candidate | ||
scores = scorer.evaluate() | ||
|
||
|
||
# In[7]: | ||
|
||
# scores['CIDEr'] contains CIDEr scores in a list for each candidate | ||
# scores['CIDErD'] contains CIDEr-D scores in a list for each candidate | ||
|
||
with open(resultFile, 'w') as outfile: | ||
json.dump(scores, outfile) |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Git LFS file not shown
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
Copyright (c) 2015, Xinlei Chen, Hao Fang, Tsung-Yi Lin, and Ramakrishna Vedantam | ||
All rights reserved. | ||
|
||
Redistribution and use in source and binary forms, with or without | ||
modification, are permitted provided that the following conditions are met: | ||
|
||
1. Redistributions of source code must retain the above copyright notice, this | ||
list of conditions and the following disclaimer. | ||
2. Redistributions in binary form must reproduce the above copyright notice, | ||
this list of conditions and the following disclaimer in the documentation | ||
and/or other materials provided with the distribution. | ||
|
||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | ||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | ||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | ||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | ||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | ||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | ||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
The views and conclusions contained in the software and documentation are those | ||
of the authors and should not be interpreted as representing official policies, | ||
either expressed or implied, of the FreeBSD Project. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"pathToData" : "data/", | ||
"refName" : "pascal50S.json", | ||
"candName" : "pascal_candsB.json", | ||
"resultFile" : "results.json", | ||
"idf" : "coco-val-df" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'tylin' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__author__ = 'tylin' |
Oops, something went wrong.