Skip to content

Commit

Permalink
update xnli download link
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed Oct 1, 2020
1 parent 1483a62 commit 7c30148
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 2 deletions.
2 changes: 1 addition & 1 deletion datasets/xnli/dataset_infos.json
@@ -1 +1 @@
{"plain_text": {"description": "XNLI is a subset of a few thousand examples from MNLI which has been translated\ninto a 14 different languages (some low-ish resource). As with MNLI, the goal is\nto predict textual entailment (does sentence A imply/contradict/neither sentence\nB) and is a classification task (given two sentences, predict one of three\nlabels).\n", "citation": "@InProceedings{conneau2018xnli,\n author = \"Conneau, Alexis\n and Rinott, Ruty\n and Lample, Guillaume\n and Williams, Adina\n and Bowman, Samuel R.\n and Schwenk, Holger\n and Stoyanov, Veselin\",\n title = \"XNLI: Evaluating Cross-lingual Sentence Representations\",\n booktitle = \"Proceedings of the 2018 Conference on Empirical Methods\n in Natural Language Processing\",\n year = \"2018\",\n publisher = \"Association for Computational Linguistics\",\n location = \"Brussels, Belgium\",\n}", "homepage": "https://www.nyu.edu/projects/bowman/xnli/", "license": "", "features": {"premise": {"languages": ["ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", "ur", "vi", "zh"], "id": null, "_type": "Translation"}, "hypothesis": {"languages": ["ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", "ur", "vi", "zh"], "num_languages": 15, "id": null, "_type": "TranslationVariableLanguages"}, "label": {"num_classes": 3, "names": ["entailment", "neutral", "contradiction"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "supervised_keys": null, "builder_name": "xnli", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "datasets_version_to_prepare": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 19419463, "num_examples": 5010, "dataset_name": "xnli"}, "validation": {"name": "validation", "num_bytes": 9582145, "num_examples": 2490, "dataset_name": "xnli"}}, "download_checksums": {"https://www.nyu.edu/projects/bowman/xnli/XNLI-1.0.zip": {"num_bytes": 17865352, "checksum": "4ba1d5e1afdb7161f0f23c66dc787802ccfa8a25a3ddd3b165a35e50df346ab1"}}, "download_size": 17865352, "dataset_size": 29001608, "size_in_bytes": 46866960}}
{"plain_text": {"description": "XNLI is a subset of a few thousand examples from MNLI which has been translated\ninto a 14 different languages (some low-ish resource). As with MNLI, the goal is\nto predict textual entailment (does sentence A imply/contradict/neither sentence\nB) and is a classification task (given two sentences, predict one of three\nlabels).\n", "citation": "@InProceedings{conneau2018xnli,\n author = {Conneau, Alexis\n and Rinott, Ruty\n and Lample, Guillaume\n and Williams, Adina\n and Bowman, Samuel R.\n and Schwenk, Holger\n and Stoyanov, Veselin},\n title = {XNLI: Evaluating Cross-lingual Sentence Representations},\n booktitle = {Proceedings of the 2018 Conference on Empirical Methods\n in Natural Language Processing},\n year = {2018},\n publisher = {Association for Computational Linguistics},\n location = {Brussels, Belgium},\n}", "homepage": "https://www.nyu.edu/projects/bowman/xnli/", "license": "", "features": {"premise": {"languages": ["ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", "ur", "vi", "zh"], "id": null, "_type": "Translation"}, "hypothesis": {"languages": ["ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", "ur", "vi", "zh"], "num_languages": 15, "id": null, "_type": "TranslationVariableLanguages"}, "label": {"num_classes": 3, "names": ["entailment", "neutral", "contradiction"], "names_file": null, "id": null, "_type": "ClassLabel"}}, "post_processed": null, "supervised_keys": null, "builder_name": "xnli", "config_name": "plain_text", "version": {"version_str": "1.0.0", "description": "", "major": 1, "minor": 0, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 19387508, "num_examples": 5010, "dataset_name": "xnli"}, "validation": {"name": "validation", "num_bytes": 9566255, "num_examples": 2490, "dataset_name": "xnli"}}, "download_checksums": {"https://cims.nyu.edu/~sbowman/xnli/XNLI-1.0.zip": {"num_bytes": 17865352, "checksum": "4ba1d5e1afdb7161f0f23c66dc787802ccfa8a25a3ddd3b165a35e50df346ab1"}}, "download_size": 17865352, "post_processing_size": null, "dataset_size": 28953763, "size_in_bytes": 46819115}}
2 changes: 1 addition & 1 deletion datasets/xnli/xnli.py
Expand Up @@ -52,7 +52,7 @@
labels).
"""

_DATA_URL = "https://www.nyu.edu/projects/bowman/xnli/XNLI-1.0.zip"
_DATA_URL = "https://cims.nyu.edu/~sbowman/xnli/XNLI-1.0.zip"

_LANGUAGES = ("ar", "bg", "de", "el", "en", "es", "fr", "hi", "ru", "sw", "th", "tr", "ur", "vi", "zh")

Expand Down

1 comment on commit 7c30148

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show benchmarks

PyArrow==0.17.1

Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.019832 / 0.011353 (0.008479) 0.017231 / 0.011008 (0.006223) 0.047087 / 0.038508 (0.008579) 0.035292 / 0.023109 (0.012183) 0.237494 / 0.275898 (-0.038404) 0.249294 / 0.323480 (-0.074186) 0.010947 / 0.007986 (0.002961) 0.004352 / 0.004328 (0.000024) 0.007051 / 0.004250 (0.002801) 0.051796 / 0.037052 (0.014744) 0.223390 / 0.258489 (-0.035099) 0.242283 / 0.293841 (-0.051558) 0.173081 / 0.128546 (0.044535) 0.138366 / 0.075646 (0.062719) 0.499264 / 0.419271 (0.079992) 0.540261 / 0.043533 (0.496728) 0.232428 / 0.255139 (-0.022711) 0.252359 / 0.283200 (-0.030841) 0.089724 / 0.141683 (-0.051959) 1.961042 / 1.452155 (0.508888) 2.125505 / 1.492716 (0.632789)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.043068 / 0.037411 (0.005657) 0.024464 / 0.014526 (0.009938) 0.075750 / 0.176557 (-0.100806) 0.175593 / 0.737135 (-0.561542) 0.206376 / 0.296338 (-0.089963)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.226956 / 0.215209 (0.011747) 2.230286 / 2.077655 (0.152631) 1.320853 / 1.504120 (-0.183267) 1.175802 / 1.541195 (-0.365392) 1.273995 / 1.468490 (-0.194495) 7.466949 / 4.584777 (2.882172) 6.333737 / 3.745712 (2.588025) 8.973548 / 5.269862 (3.703687) 7.766758 / 4.565676 (3.201081) 0.755498 / 0.424275 (0.331223) 0.012226 / 0.007607 (0.004619) 0.257134 / 0.226044 (0.031090) 2.646225 / 2.268929 (0.377297) 1.855319 / 55.444624 (-53.589305) 1.668225 / 6.876477 (-5.208252) 1.726153 / 2.142072 (-0.415919) 7.583909 / 4.805227 (2.778682) 9.157329 / 6.500664 (2.656665) 9.676143 / 0.075469 (9.600674)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 15.416203 / 1.841788 (13.574415) 16.109855 / 8.074308 (8.035547) 16.678082 / 10.191392 (6.486690) 0.530979 / 0.680424 (-0.149445) 0.354116 / 0.534201 (-0.180085) 0.898286 / 0.579283 (0.319003) 0.671001 / 0.434364 (0.236638) 0.867661 / 0.540337 (0.327324) 1.792642 / 1.386936 (0.405706)
PyArrow==1.0
Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.020126 / 0.011353 (0.008773) 0.017420 / 0.011008 (0.006412) 0.049561 / 0.038508 (0.011053) 0.036107 / 0.023109 (0.012997) 0.370378 / 0.275898 (0.094480) 0.383482 / 0.323480 (0.060002) 0.010253 / 0.007986 (0.002268) 0.005225 / 0.004328 (0.000896) 0.007148 / 0.004250 (0.002897) 0.050978 / 0.037052 (0.013926) 0.378928 / 0.258489 (0.120439) 0.408279 / 0.293841 (0.114439) 0.174574 / 0.128546 (0.046028) 0.135921 / 0.075646 (0.060275) 0.476219 / 0.419271 (0.056947) 0.446881 / 0.043533 (0.403348) 0.371180 / 0.255139 (0.116041) 0.384560 / 0.283200 (0.101360) 0.098056 / 0.141683 (-0.043627) 1.925365 / 1.452155 (0.473210) 1.990367 / 1.492716 (0.497650)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.047642 / 0.037411 (0.010231) 0.022424 / 0.014526 (0.007898) 0.026886 / 0.176557 (-0.149670) 0.082371 / 0.737135 (-0.654765) 0.029589 / 0.296338 (-0.266750)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.273399 / 0.215209 (0.058190) 2.742843 / 2.077655 (0.665188) 1.915601 / 1.504120 (0.411481) 1.867363 / 1.541195 (0.326169) 1.962267 / 1.468490 (0.493777) 7.360884 / 4.584777 (2.776108) 6.188338 / 3.745712 (2.442626) 8.776072 / 5.269862 (3.506211) 7.611771 / 4.565676 (3.046095) 0.733264 / 0.424275 (0.308989) 0.012672 / 0.007607 (0.005064) 0.310271 / 0.226044 (0.084226) 3.394254 / 2.268929 (1.125325) 2.440167 / 55.444624 (-53.004458) 2.308685 / 6.876477 (-4.567791) 2.490930 / 2.142072 (0.348858) 7.509937 / 4.805227 (2.704709) 6.809656 / 6.500664 (0.308991) 7.793635 / 0.075469 (7.718166)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 15.495662 / 1.841788 (13.653874) 14.724263 / 8.074308 (6.649954) 16.656450 / 10.191392 (6.465058) 1.250428 / 0.680424 (0.570004) 0.655058 / 0.534201 (0.120857) 0.901903 / 0.579283 (0.322620) 0.668427 / 0.434364 (0.234063) 0.837419 / 0.540337 (0.297082) 1.761518 / 1.386936 (0.374582)

Please sign in to comment.