Skip to content

Commit

Permalink
Update tasks.json
Browse files Browse the repository at this point in the history
  • Loading branch information
sayakpaul committed Dec 6, 2022
1 parent d66a5f3 commit 1073b3a
Showing 1 changed file with 19 additions and 11 deletions.
30 changes: 19 additions & 11 deletions src/datasets/utils/resources/tasks.json
Expand Up @@ -21,6 +21,9 @@
"dialogue-generation"
]
},
"depth-estimation": {
"type": "cv"
},
"feature-extraction": {
"type": "multimodal"
},
Expand Down Expand Up @@ -94,21 +97,23 @@
"type": "nlp"
},
"table-to-text": {
"type": "nlp",
"subtasks": [
"rdf-to-text"
]
"type": "nlp"
},
"tabular-classification": {
"type": "structured",
"type": "tabular",
"subtasks": [
"tabular-multi-class-classification",
"tabular-multi-label-classification",
"tabular-multi-label-classification"
]
},
"tabular-regression": {
"type": "tabular",
"subtasks": [
"tabular-single-column-regression"
]
},
"tabular-to-text": {
"type": "structured",
"type": "tabular",
"subtasks": [
"rdf-to-text"
]
Expand All @@ -122,6 +127,7 @@
"intent-classification",
"multi-class-classification",
"multi-label-classification",
"multi-input-text-classification",
"natural-language-inference",
"semantic-similarity-classification",
"sentiment-classification",
Expand All @@ -130,8 +136,7 @@
"sentiment-scoring",
"sentiment-analysis",
"hate-speech-detection",
"text-scoring",
"multi-input-text-classification"
"text-scoring"
]
},
"text-generation": {
Expand Down Expand Up @@ -169,7 +174,7 @@
]
},
"time-series-forecasting": {
"type": "structured",
"type": "tabular",
"subtasks": [
"univariate-time-series-forecasting",
"multivariate-time-series-forecasting"
Expand All @@ -192,6 +197,9 @@
"unconditional-image-generation": {
"type": "cv"
},
"video-classification": {
"type": "cv"
},
"visual-question-answering": {
"type": "multimodal",
"subtasks": [
Expand All @@ -207,4 +215,4 @@
"zero-shot-image-classification": {
"type": "cv"
}
}
}

1 comment on commit 1073b3a

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Show benchmarks

PyArrow==6.0.0

Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.009976 / 0.011353 (-0.001377) 0.005432 / 0.011008 (-0.005576) 0.098557 / 0.038508 (0.060049) 0.037544 / 0.023109 (0.014435) 0.298748 / 0.275898 (0.022850) 0.369764 / 0.323480 (0.046284) 0.008486 / 0.007986 (0.000500) 0.005907 / 0.004328 (0.001578) 0.074523 / 0.004250 (0.070272) 0.048328 / 0.037052 (0.011276) 0.319795 / 0.258489 (0.061306) 0.346578 / 0.293841 (0.052737) 0.042827 / 0.128546 (-0.085719) 0.015495 / 0.075646 (-0.060151) 0.337914 / 0.419271 (-0.081358) 0.051295 / 0.043533 (0.007762) 0.295941 / 0.255139 (0.040802) 0.318943 / 0.283200 (0.035743) 0.110222 / 0.141683 (-0.031461) 1.484829 / 1.452155 (0.032674) 1.537215 / 1.492716 (0.044499)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.270793 / 0.018006 (0.252786) 0.602351 / 0.000490 (0.601861) 0.001601 / 0.000200 (0.001401) 0.000141 / 0.000054 (0.000087)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.028719 / 0.037411 (-0.008692) 0.117576 / 0.014526 (0.103050) 0.121485 / 0.176557 (-0.055072) 0.160075 / 0.737135 (-0.577061) 0.130005 / 0.296338 (-0.166333)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.398835 / 0.215209 (0.183626) 3.975890 / 2.077655 (1.898235) 1.787883 / 1.504120 (0.283763) 1.600817 / 1.541195 (0.059623) 1.735005 / 1.468490 (0.266515) 0.698867 / 4.584777 (-3.885910) 3.864428 / 3.745712 (0.118716) 3.496830 / 5.269862 (-1.773032) 1.872485 / 4.565676 (-2.693192) 0.084826 / 0.424275 (-0.339449) 0.012081 / 0.007607 (0.004474) 0.524143 / 0.226044 (0.298099) 5.185905 / 2.268929 (2.916977) 2.258710 / 55.444624 (-53.185915) 1.935393 / 6.876477 (-4.941084) 2.179126 / 2.142072 (0.037053) 0.851923 / 4.805227 (-3.953304) 0.166850 / 6.500664 (-6.333814) 0.065182 / 0.075469 (-0.010287)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.444284 / 1.841788 (-0.397504) 15.471922 / 8.074308 (7.397613) 25.028469 / 10.191392 (14.837077) 0.804477 / 0.680424 (0.124054) 0.527627 / 0.534201 (-0.006574) 0.444077 / 0.579283 (-0.135206) 0.435348 / 0.434364 (0.000984) 0.278365 / 0.540337 (-0.261972) 0.274994 / 1.386936 (-1.111942)
PyArrow==latest
Show updated benchmarks!

Benchmark: benchmark_array_xd.json

metric read_batch_formatted_as_numpy after write_array2d read_batch_formatted_as_numpy after write_flattened_sequence read_batch_formatted_as_numpy after write_nested_sequence read_batch_unformated after write_array2d read_batch_unformated after write_flattened_sequence read_batch_unformated after write_nested_sequence read_col_formatted_as_numpy after write_array2d read_col_formatted_as_numpy after write_flattened_sequence read_col_formatted_as_numpy after write_nested_sequence read_col_unformated after write_array2d read_col_unformated after write_flattened_sequence read_col_unformated after write_nested_sequence read_formatted_as_numpy after write_array2d read_formatted_as_numpy after write_flattened_sequence read_formatted_as_numpy after write_nested_sequence read_unformated after write_array2d read_unformated after write_flattened_sequence read_unformated after write_nested_sequence write_array2d write_flattened_sequence write_nested_sequence
new / old (diff) 0.008114 / 0.011353 (-0.003239) 0.005519 / 0.011008 (-0.005489) 0.097036 / 0.038508 (0.058528) 0.035853 / 0.023109 (0.012744) 0.385319 / 0.275898 (0.109421) 0.420184 / 0.323480 (0.096704) 0.007001 / 0.007986 (-0.000985) 0.004360 / 0.004328 (0.000031) 0.073143 / 0.004250 (0.068892) 0.045098 / 0.037052 (0.008045) 0.387562 / 0.258489 (0.129073) 0.433838 / 0.293841 (0.139997) 0.037722 / 0.128546 (-0.090825) 0.012764 / 0.075646 (-0.062882) 0.353599 / 0.419271 (-0.065672) 0.050028 / 0.043533 (0.006495) 0.375590 / 0.255139 (0.120451) 0.397023 / 0.283200 (0.113824) 0.111397 / 0.141683 (-0.030286) 1.466185 / 1.452155 (0.014030) 1.591916 / 1.492716 (0.099200)

Benchmark: benchmark_getitem_100B.json

metric get_batch_of_1024_random_rows get_batch_of_1024_rows get_first_row get_last_row
new / old (diff) 0.320909 / 0.018006 (0.302903) 0.542284 / 0.000490 (0.541794) 0.021301 / 0.000200 (0.021101) 0.000181 / 0.000054 (0.000126)

Benchmark: benchmark_indices_mapping.json

metric select shard shuffle sort train_test_split
new / old (diff) 0.031132 / 0.037411 (-0.006279) 0.118363 / 0.014526 (0.103837) 0.129267 / 0.176557 (-0.047290) 0.165211 / 0.737135 (-0.571924) 0.134603 / 0.296338 (-0.161735)

Benchmark: benchmark_iterating.json

metric read 5000 read 50000 read_batch 50000 10 read_batch 50000 100 read_batch 50000 1000 read_formatted numpy 5000 read_formatted pandas 5000 read_formatted tensorflow 5000 read_formatted torch 5000 read_formatted_batch numpy 5000 10 read_formatted_batch numpy 5000 1000 shuffled read 5000 shuffled read 50000 shuffled read_batch 50000 10 shuffled read_batch 50000 100 shuffled read_batch 50000 1000 shuffled read_formatted numpy 5000 shuffled read_formatted_batch numpy 5000 10 shuffled read_formatted_batch numpy 5000 1000
new / old (diff) 0.429611 / 0.215209 (0.214402) 4.299062 / 2.077655 (2.221407) 2.131809 / 1.504120 (0.627689) 1.945787 / 1.541195 (0.404593) 2.035194 / 1.468490 (0.566704) 0.691625 / 4.584777 (-3.893152) 3.857583 / 3.745712 (0.111871) 2.174195 / 5.269862 (-3.095666) 1.375138 / 4.565676 (-3.190538) 0.085697 / 0.424275 (-0.338578) 0.012355 / 0.007607 (0.004748) 0.541149 / 0.226044 (0.315104) 5.402926 / 2.268929 (3.133998) 2.630309 / 55.444624 (-52.814315) 2.290538 / 6.876477 (-4.585939) 2.520786 / 2.142072 (0.378713) 0.838535 / 4.805227 (-3.966692) 0.170170 / 6.500664 (-6.330494) 0.064036 / 0.075469 (-0.011434)

Benchmark: benchmark_map_filter.json

metric filter map fast-tokenizer batched map identity map identity batched map no-op batched map no-op batched numpy map no-op batched pandas map no-op batched pytorch map no-op batched tensorflow
new / old (diff) 1.538513 / 1.841788 (-0.303275) 15.891108 / 8.074308 (7.816799) 12.512314 / 10.191392 (2.320922) 0.936912 / 0.680424 (0.256488) 0.595258 / 0.534201 (0.061057) 0.422227 / 0.579283 (-0.157056) 0.421947 / 0.434364 (-0.012416) 0.248909 / 0.540337 (-0.291428) 0.274528 / 1.386936 (-1.112408)

Please sign in to comment.