Merge pull request #112 from franchuterivera/refactor_development_ADD…

…_109 Pre fetch openml data for pytest
automl · Mar 1, 2021 · 55ec853 · 55ec853
2 parents ffc1620 + 1602933
commit 55ec853
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 0 deletions.
diff --git a/setup.py b/setup.py
@@ -47,6 +47,7 @@
             "codecov",
             "pep8",
             "mypy",
+            "openml"
         ],
         "examples": [
             "matplotlib",

diff --git a/test/conftest.py b/test/conftest.py
@@ -8,6 +8,8 @@
 
 import numpy as np
 
+import openml
+
 import pandas as pd
 
 import pytest
@@ -23,6 +25,42 @@
 from autoPyTorch.utils.pipeline import get_dataset_requirements
 
 
+@pytest.fixture(scope="session")
+def callattr_ahead_of_alltests(request):
+    """
+    This procedure will run at the start of the pytest session.
+    It will prefetch several task that are going to be used by
+    the testing face, and it does so in a robust way, until the openml
+    API provides the desired resources
+    """
+    tasks_used = [
+        146818,  # Australian
+        2295,    # cholesterol
+        2075,    # abalone
+        2071,    # adult
+        3,       # kr-vs-kp
+        9981,    # cnae-9
+        146821,  # car
+        146822,  # Segment
+        2,       # anneal
+        53,      # vehicle
+        5136,    # tecator
+        4871,    # sensory
+        4857,    # boston
+        3916,    # kc1
+    ]
+
+    # Populate the cache
+    # This will make the test fail immediately rather than
+    # Waiting for a openml fetch timeout
+    openml.populate_cache(task_ids=tasks_used)
+    # Also the bunch
+    for task in tasks_used:
+        fetch_openml(data_id=openml.tasks.get_task(task).dataset_id,
+                     return_X_y=True)
+    return
+
+
 def slugify(text):
     return re.sub(r'[\[\]]+', '-', text.lower())