datasets/race/dataset_infos.json

{"high": {"description": "Race is a large-scale reading comprehension dataset with more than 28,000 passages and nearly 100,000 questions. The\n dataset is collected from English examinations in China, which are designed for middle school and high school students.\nThe dataset can be served as the training and test sets for machine comprehension.\n\n", "citation": "@article{lai2017large,\n    title={RACE: Large-scale ReAding Comprehension Dataset From Examinations},\n    author={Lai, Guokun and Xie, Qizhe and Liu, Hanxiao and Yang, Yiming and Hovy, Eduard},\n    journal={arXiv preprint arXiv:1704.04683},\n    year={2017}\n}\n", "homepage": "http://www.cs.cmu.edu/~glai1/data/race/", "license": "", "features": {"example_id": {"dtype": "string", "id": null, "_type": "Value"}, "article": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "options": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "race", "config_name": "high", "version": {"version_str": "0.1.0", "description": null, "major": 0, "minor": 1, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 6989121, "num_examples": 3498, "dataset_name": "race"}, "train": {"name": "train", "num_bytes": 126243396, "num_examples": 62445, "dataset_name": "race"}, "validation": {"name": "validation", "num_bytes": 6885287, "num_examples": 3451, "dataset_name": "race"}}, "download_checksums": {"http://www.cs.cmu.edu/~glai1/data/race/RACE.tar.gz": {"num_bytes": 25443609, "checksum": "b2769cc9fdc5c546a693300eb9a966cec6870bd349fbc44ed5225f8ad33006e5"}}, "download_size": 25443609, "post_processing_size": null, "dataset_size": 140117804, "size_in_bytes": 165561413}, "middle": {"description": "Race is a large-scale reading comprehension dataset with more than 28,000 passages and nearly 100,000 questions. The\n dataset is collected from English examinations in China, which are designed for middle school and high school students.\nThe dataset can be served as the training and test sets for machine comprehension.\n\n", "citation": "@article{lai2017large,\n    title={RACE: Large-scale ReAding Comprehension Dataset From Examinations},\n    author={Lai, Guokun and Xie, Qizhe and Liu, Hanxiao and Yang, Yiming and Hovy, Eduard},\n    journal={arXiv preprint arXiv:1704.04683},\n    year={2017}\n}\n", "homepage": "http://www.cs.cmu.edu/~glai1/data/race/", "license": "", "features": {"example_id": {"dtype": "string", "id": null, "_type": "Value"}, "article": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "options": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "race", "config_name": "middle", "version": {"version_str": "0.1.0", "description": null, "major": 0, "minor": 1, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 1786297, "num_examples": 1436, "dataset_name": "race"}, "train": {"name": "train", "num_bytes": 31065322, "num_examples": 25421, "dataset_name": "race"}, "validation": {"name": "validation", "num_bytes": 1761937, "num_examples": 1436, "dataset_name": "race"}}, "download_checksums": {"http://www.cs.cmu.edu/~glai1/data/race/RACE.tar.gz": {"num_bytes": 25443609, "checksum": "b2769cc9fdc5c546a693300eb9a966cec6870bd349fbc44ed5225f8ad33006e5"}}, "download_size": 25443609, "post_processing_size": null, "dataset_size": 34613556, "size_in_bytes": 60057165}, "all": {"description": "Race is a large-scale reading comprehension dataset with more than 28,000 passages and nearly 100,000 questions. The\n dataset is collected from English examinations in China, which are designed for middle school and high school students.\nThe dataset can be served as the training and test sets for machine comprehension.\n\n", "citation": "@article{lai2017large,\n    title={RACE: Large-scale ReAding Comprehension Dataset From Examinations},\n    author={Lai, Guokun and Xie, Qizhe and Liu, Hanxiao and Yang, Yiming and Hovy, Eduard},\n    journal={arXiv preprint arXiv:1704.04683},\n    year={2017}\n}\n", "homepage": "http://www.cs.cmu.edu/~glai1/data/race/", "license": "", "features": {"example_id": {"dtype": "string", "id": null, "_type": "Value"}, "article": {"dtype": "string", "id": null, "_type": "Value"}, "answer": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "options": {"feature": {"dtype": "string", "id": null, "_type": "Value"}, "length": -1, "id": null, "_type": "Sequence"}}, "post_processed": null, "supervised_keys": null, "builder_name": "race", "config_name": "all", "version": {"version_str": "0.1.0", "description": null, "major": 0, "minor": 1, "patch": 0}, "splits": {"test": {"name": "test", "num_bytes": 8775394, "num_examples": 4934, "dataset_name": "race"}, "train": {"name": "train", "num_bytes": 157308694, "num_examples": 87866, "dataset_name": "race"}, "validation": {"name": "validation", "num_bytes": 8647200, "num_examples": 4887, "dataset_name": "race"}}, "download_checksums": {"http://www.cs.cmu.edu/~glai1/data/race/RACE.tar.gz": {"num_bytes": 25443609, "checksum": "b2769cc9fdc5c546a693300eb9a966cec6870bd349fbc44ed5225f8ad33006e5"}}, "download_size": 25443609, "post_processing_size": null, "dataset_size": 174731288, "size_in_bytes": 200174897}}