Skip to content

Commit

Permalink
fix get_dataset (#835)
Browse files Browse the repository at this point in the history
* fix `get_dataset`

* format
  • Loading branch information
VictorSanh committed Oct 28, 2022
1 parent 536339e commit 40ae287
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 22 deletions.
9 changes: 7 additions & 2 deletions promptsource/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,13 +313,18 @@ def show_text(t, width=WIDTH, with_markdown=False):
except OSError as e:
st.error(
f"Some datasets are not handled automatically by `datasets` and require users to download the "
f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
f"dataset manually. It is possibly the case for {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
f"\n\nIf so, please download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment "
f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`"
f"\n\nOriginal error:\n{str(e)}"
)
st.stop()
except Exception as e:
st.error(
f"An error occured while loading the dataset {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
f"\\n\nOriginal error:\n{str(e)}"
)

splits = list(dataset.keys())
index = 0
Expand Down
32 changes: 12 additions & 20 deletions promptsource/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,33 +46,25 @@ def get_dataset_builder(path, conf=None):

def get_dataset(path, conf=None):
"Get a dataset from name and conf."
builder_instance = get_dataset_builder(path, conf)
if builder_instance.manual_download_instructions is None and builder_instance.info.size_in_bytes is not None:
builder_instance.download_and_prepare()
return builder_instance.as_dataset()
else:
return load_dataset(path, conf)


def load_dataset(dataset_name, subset_name):
try:
return datasets.load_dataset(dataset_name, subset_name)
return datasets.load_dataset(path, conf)
except datasets.builder.ManualDownloadError:
cache_root_dir = (
os.environ["PROMPTSOURCE_MANUAL_DATASET_DIR"]
if "PROMPTSOURCE_MANUAL_DATASET_DIR" in os.environ
else DEFAULT_PROMPTSOURCE_CACHE_HOME
)
data_dir = (
f"{cache_root_dir}/{dataset_name}"
if subset_name is None
else f"{cache_root_dir}/{dataset_name}/{subset_name}"
)
return datasets.load_dataset(
dataset_name,
subset_name,
data_dir=data_dir,
)
data_dir = f"{cache_root_dir}/{path}" if conf is None else f"{cache_root_dir}/{path}/{conf}"
try:
return datasets.load_dataset(
path,
conf,
data_dir=data_dir,
)
except Exception as err:
raise err
except Exception as err:
raise err


def get_dataset_confs(path):
Expand Down

0 comments on commit 40ae287

Please sign in to comment.