From f2c133734f8aca358bf8d036c5d13ba52284462b Mon Sep 17 00:00:00 2001
From: Michael Lappert <mi.lappert@gmail.com>
Date: Fri, 17 May 2024 12:18:56 +0200
Subject: [PATCH 1/5] add config_name and data_files to datasetcard.md specs

---
 datasetcard.md | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/datasetcard.md b/datasetcard.md
index e961efa18..d71772058 100644
--- a/datasetcard.md
+++ b/datasetcard.md
@@ -33,9 +33,13 @@ task_ids:
 - {subtask_0}  # Example: extractive-qa
 - {subtask_1}  # Example: multi-class-image-classification
 paperswithcode_id: {paperswithcode_id}  # Dataset id on PapersWithCode (from the URL). Example for SQuAD: squad
-configs:  # Optional for datasets with multiple configurations like glue.
-- {config_0}  # Example for glue: sst2
-- {config_1}  # Example for glue: cola
+configs:  # Optional for datasets with multiple configurations.
+  - config_name: default
+    data_files:
+      - split: train
+        path: "data.csv"
+      - split: test
+        path: "holdout.csv"
 
 # Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
 dataset_info:

From c2f3b1ab24f18bb0dc2cbf4df2e996b502587c8d Mon Sep 17 00:00:00 2001
From: Michael <61876623+lappemic@users.noreply.github.com>
Date: Fri, 17 May 2024 16:10:46 +0200
Subject: [PATCH 2/5] Apply suggesions

Co-authored-by: Polina Kazakova <polina@huggingface.co>
---
 datasetcard.md | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/datasetcard.md b/datasetcard.md
index d71772058..0477b0e12 100644
--- a/datasetcard.md
+++ b/datasetcard.md
@@ -33,13 +33,15 @@ task_ids:
 - {subtask_0}  # Example: extractive-qa
 - {subtask_1}  # Example: multi-class-image-classification
 paperswithcode_id: {paperswithcode_id}  # Dataset id on PapersWithCode (from the URL). Example for SQuAD: squad
-configs:  # Optional for datasets with multiple configurations.
-  - config_name: default
+configs:  # Optional. This can be used to pass additional parameters to the dataset loader, such as `data_files`, `data_dir`, and any builder-specific parameters  
+  - config_name: {config_name_0}  # Example: default
     data_files:
-      - split: train
-        path: "data.csv"
-      - split: test
-        path: "holdout.csv"
+      - split: {split_name_0}  # Example: train
+        path: {file_path_0}  # Example: data.csv
+      - split: {split_name_1}  # Example: test
+        path: {file_path_1}   # Example: holdout.csv
+   - config_name: {config_name_1}  # Example: ...
+      ... #TODO
 
 # Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
 dataset_info:

From c0a6f6bdc23d03333212b4af20555791c24d2aa3 Mon Sep 17 00:00:00 2001
From: Michael <61876623+lappemic@users.noreply.github.com>
Date: Fri, 17 May 2024 23:02:13 +0200
Subject: [PATCH 3/5] Apply suggestion

Co-authored-by: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com>
---
 datasetcard.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/datasetcard.md b/datasetcard.md
index 0477b0e12..e63c37fcb 100644
--- a/datasetcard.md
+++ b/datasetcard.md
@@ -34,14 +34,14 @@ task_ids:
 - {subtask_1}  # Example: multi-class-image-classification
 paperswithcode_id: {paperswithcode_id}  # Dataset id on PapersWithCode (from the URL). Example for SQuAD: squad
 configs:  # Optional. This can be used to pass additional parameters to the dataset loader, such as `data_files`, `data_dir`, and any builder-specific parameters  
-  - config_name: {config_name_0}  # Example: default
-    data_files:
-      - split: {split_name_0}  # Example: train
-        path: {file_path_0}  # Example: data.csv
-      - split: {split_name_1}  # Example: test
-        path: {file_path_1}   # Example: holdout.csv
-   - config_name: {config_name_1}  # Example: ...
-      ... #TODO
+- config_name: {config_name_0}  # Example: default
+  data_files:
+  - split: {split_name_0}  # Example: train
+    path: {file_path_0}  # Example: data.csv
+  - split: {split_name_1}  # Example: test
+    path: {file_path_1}   # Example: holdout.csv
+- config_name: {config_name_1}  # Example: ...
+  ... #TODO
 
 # Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
 dataset_info:

From 7a0c4b2878803fee0f3d397e32471250c2d9c28b Mon Sep 17 00:00:00 2001
From: Michael <61876623+lappemic@users.noreply.github.com>
Date: Fri, 17 May 2024 23:04:47 +0200
Subject: [PATCH 4/5] Remove #TODO

---
 datasetcard.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/datasetcard.md b/datasetcard.md
index e63c37fcb..540db162d 100644
--- a/datasetcard.md
+++ b/datasetcard.md
@@ -41,7 +41,6 @@ configs:  # Optional. This can be used to pass additional parameters to the data
   - split: {split_name_1}  # Example: test
     path: {file_path_1}   # Example: holdout.csv
 - config_name: {config_name_1}  # Example: ...
-  ... #TODO
 
 # Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
 dataset_info:

From 414d4aaa852a3b765b3fad9064824e98c79b2f63 Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com>
Date: Thu, 23 May 2024 11:48:32 +0200
Subject: [PATCH 5/5] Update datasetcard.md

Co-authored-by: Polina Kazakova <polina@huggingface.co>
---
 datasetcard.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datasetcard.md b/datasetcard.md
index 540db162d..44c968741 100644
--- a/datasetcard.md
+++ b/datasetcard.md
@@ -40,7 +40,10 @@ configs:  # Optional. This can be used to pass additional parameters to the data
     path: {file_path_0}  # Example: data.csv
   - split: {split_name_1}  # Example: test
     path: {file_path_1}   # Example: holdout.csv
-- config_name: {config_name_1}  # Example: ...
+- config_name: {config_name_1}  # Example: processed
+  data_files:
+  - split: {split_name_3}  # Example: train
+    path: {file_path_3}  # Example: data_processed.csv
 
 # Optional. This part can be used to store the feature types and size of the dataset to be used in python. This can be automatically generated using the datasets-cli.
 dataset_info: