forked from huggingface/datasets
/
test_features.py
26 lines (22 loc) 路 1.24 KB
/
test_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from unittest import TestCase
from nlp.arrow_dataset import Dataset
from nlp.features import Features, Sequence, Value
class FeaturesTest(TestCase):
def test_from_arrow_schema_simple(self):
data = {"a": [{"b": {"c": "text"}}] * 10, "foo": [1] * 10}
original_features = Features({"a": {"b": {"c": Value("string")}}, "foo": Value("int64")})
dset = Dataset.from_dict(data, features=original_features)
new_features = dset.features
new_dset = Dataset.from_dict(data, features=new_features)
self.assertEqual(original_features.type, new_features.type)
self.assertDictEqual(dset[0], new_dset[0])
self.assertDictEqual(dset[:], new_dset[:])
def test_from_arrow_schema_with_sequence(self):
data = {"a": [{"b": {"c": ["text"]}}] * 10, "foo": [1] * 10}
original_features = Features({"a": {"b": Sequence({"c": Value("string")})}, "foo": Value("int64")})
dset = Dataset.from_dict(data, features=original_features)
new_features = dset.features
new_dset = Dataset.from_dict(data, features=new_features)
self.assertEqual(original_features.type, new_features.type)
self.assertDictEqual(dset[0], new_dset[0])
self.assertDictEqual(dset[:], new_dset[:])