Merge pull request #42 from WenjieDu/dev

Simplify API names, add the config file for ReadTheDocs
WenjieDu · Oct 17, 2023 · 63f6e69 · 63f6e69
2 parents b90a418 + 745b096
commit 63f6e69
Show file tree

Hide file tree

Showing 7 changed files with 293 additions and 162 deletions.
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
@@ -0,0 +1,29 @@
+# This is file is used to help customize TSDB documentation building process on ReadTheDocs.
+
+version: 2
+
+formats:
+    - htmlzip
+    - pdf
+    - epub
+
+sphinx:
+    configuration: docs/conf.py
+    fail_on_warning: false
+
+build:
+    os: ubuntu-22.04
+
+    tools:
+        python: "3.10"
+
+    jobs:
+        pre_install:
+            - python -m pip install --upgrade pip
+            - pip install pandas numpy scipy scikit-learn
+            - pip install sphinx==6.2.1 docutils==0.19 sphinxcontrib-bibtex==2.1.4 sphinxcontrib-gtagjs sphinx-autodoc-typehints furo==2023.07.26
+
+        post_install:
+            - pip install docutils==0.20
+            # this version fixes issue#102, put it in post_install to avoid being
+            # overwritten by other versions (like 0.19) while installing other packages
diff --git a/README.md b/README.md
@@ -1,8 +1,8 @@
-<a href='https://github.com/WenjieDu/TSDB'><img src="https://pypots.com/figs/pypots_logos/TSDB_logo_FFBG.svg?sanitize=truee" align='right' width='235'/></a>
+<a href='https://github.com/WenjieDu/TSDB'><img src="https://pypots.com/figs/pypots_logos/TSDB_logo_FFBG.svg" align='right' width='235'/></a>
 
 <h2 align="center">Welcome to TSDB</h2>
 
-**<p align='center'>A Python Toolbox to Ease Loading Open-Source Time-Series Datasets</p>**
+*<p align='center'>a Python toolbox to ease loading public time-series datasets</p>*
 
 <p align='center'>
     <a href='https://github.com/WenjieDu/TSDB'>
@@ -43,14 +43,14 @@ TSDB is created to help researchers and engineers get rid of data collecting and
 🤝 If you need TSDB to integrate an open-source dataset or want to add it into TSDB yourself, please feel free to request for it by creating an issue or make a PR to merge your code.
 
 🤗 **Please** star this repo to help others notice TSDB if you think it is a useful toolkit.
-**Please** properly [cite TSDB](https://github.com/WenjieDu/TSDB#-citing-tsdbpypots) in your publications
+**Please** properly [cite TSDB and PyPOTS](https://github.com/WenjieDu/TSDB#-citing-tsdbpypots) in your publications
 if it helps with your research. This really means a lot to our open-source research. Thank you!
 
 
 ## ❖ Usage Examples
 TSDB now is available on <a alt='Anaconda' href='https://anaconda.org/conda-forge/tsdb'><img align='center' src='https://img.shields.io/badge/Anaconda--lightgreen?style=social&logo=anaconda'></a>❗️
 
-Install it with `conda install tsdb`, you may need to specify the channel with option `-c conda-forge`
+Install it with `conda install tsdb `, you may need to specify the channel with option `-c conda-forge`
 
 or install from PyPI:
 > pip install tsdb
@@ -62,11 +62,12 @@ or install from source code:
 import tsdb
 
 tsdb.list_available_datasets()  # list all available datasets in TSDB
-data = tsdb.load_dataset('physionet_2012')  # select the dataset you need and load it, TSDB will download, extract, and process it automatically
+data = tsdb.load(
+    'physionet_2012')  # select the dataset you need and load it, TSDB will download, extract, and process it automatically
 tsdb.download_and_extract('physionet_2012', './save_it_here')  # if you need the raw data, use download_and_extract()
-tsdb.list_cached_data()  # datasets you once loaded are cached, and you can check them with list_cached_data()
-tsdb.delete_cached_data(dataset_name='physionet_2012')  # you can delete only one specific dataset and preserve others
-tsdb.delete_cached_data()  # or you can delete all cache with delete_cached_data() to free disk space
+tsdb.list_cache()  # datasets you once loaded are cached, and you can check them with list_cached_data()
+tsdb.delete_cache(dataset_name='physionet_2012')  # you can delete only one specific dataset and preserve others
+tsdb.delete_cache()  # or you can delete all cache with delete_cached_data() to free disk space
 ```
 
 That's all. Simple and efficient. Enjoy it! 😃
@@ -92,7 +93,7 @@ please cite PyPOTS project as below and 🌟star this repository to make others
 
 ``` bibtex
 @article{du2023PyPOTS,
-title={{PyPOTS: A Python Toolbox for Data Mining on Partially-Observed Time Series}},
+title={{PyPOTS: a Python toolbox for data mining on Partially-Observed Time Series}},
 author={Wenjie Du},
 year={2023},
 eprint={2305.18811},
@@ -103,11 +104,25 @@ doi={10.48550/arXiv.2305.18811},
 }
 ```
 
+> Wenjie Du. (2023).
+> PyPOTS: a Python toolbox for data mining on Partially-Observed Time Series.
+> arXiv, abs/2305.18811.https://arxiv.org/abs/2305.18811
+
 or
 
+``` bibtex
+@inproceedings{du2023PyPOTS,
+title={{PyPOTS: a Python toolbox for data mining on Partially-Observed Time Series}},
+booktitle={9th SIGKDD workshop on Mining and Learning from Time Series (MiLeTS'23)},
+author={Wenjie Du},
+year={2023},
+url={https://arxiv.org/abs/2305.18811},
+}
+```
+
 > Wenjie Du. (2023).
-> PyPOTS: A Python Toolbox for Data Mining on Partially-Observed Time Series.
-> arXiv, abs/2305.18811. https://doi.org/10.48550/arXiv.2305.18811
+> PyPOTS: a Python toolbox for data mining on Partially-Observed Time Series.
+> In *9th SIGKDD workshop on Mining and Learning from Time Series (MiLeTS'23)*. https://arxiv.org/abs/2305.18811
 
 
 <details>

diff --git a/tests/test_tsdb.py b/tests/test_tsdb.py
@@ -1,13 +1,16 @@
 """
-TSDB test cases
+TSDB unit testing cases.
 """
 
 # Created by Wenjie Du <wenjay.du@gmail.com>
 # License: GLP-v3
+
 import os
 import unittest
 
 import tsdb
+from tsdb.database import DATABASE
+from tsdb.utils.logging import Logger
 
 DATASETS_TO_TEST = [
     "physionet_2012",
@@ -19,28 +22,60 @@
 
 
 class TestTSDB(unittest.TestCase):
+    logger_creator = Logger(name="testing log", logging_level="debug")
+    logger = logger_creator.logger
+
     def test_0_available_datasets(self):
-        all_datasets_in_database = tsdb.list_database()
-        available_datasets = tsdb.list_available_datasets()
+        available_datasets = tsdb.list()
         assert len(available_datasets) > 0
-        assert len(all_datasets_in_database) == len(available_datasets)
+        assert len(DATABASE) == len(available_datasets)
 
     def test_1_downloading_only(self):
         tsdb.download_and_extract("ucr_uea_Wine", "./save_it_here")
         file_list = os.listdir()
         assert len(file_list) > 0
-        tsdb.purge_given_path("save_it_here")
+        tsdb.purge_path("save_it_here")
 
     def test_2_dataset_loading(self):
         for d_ in DATASETS_TO_TEST:
-            data = tsdb.load_dataset(d_)
+            data = tsdb.load(d_)
             assert isinstance(data, dict), f"Loaded dataset {d_} is not a dict."
 
     def test_3_dataset_purging(self):
-        cached_datasets = tsdb.list_cached_data()
+        cached_datasets = tsdb.list_cache()
         assert isinstance(cached_datasets, list)
-        tsdb.delete_cached_data("physionet_2012")  # delete single
-        tsdb.delete_cached_data()  # delete all
+        tsdb.delete_cache("physionet_2012")  # delete single
+        tsdb.delete_cache()  # delete all
+
+    def test_4_logging(self):
+        # different level logging
+        self.logger.debug("debug")
+        self.logger.info("info")
+        self.logger.warning("warning")
+        self.logger.error("error")
+
+        # change logging level
+        self.logger_creator.set_level("info")
+        assert (
+            self.logger.level == 20
+        ), f"the level of logger is {self.logger.level}, not INFO"
+        self.logger_creator.set_level("warning")
+        assert (
+            self.logger.level == 30
+        ), f"the level of logger is {self.logger.level}, not WARNING"
+        self.logger_creator.set_level("error")
+        assert (
+            self.logger.level == 40
+        ), f"the level of logger is {self.logger.level}, not ERROR"
+        self.logger_creator.set_level("debug")
+        assert (
+            self.logger.level == 10
+        ), f"the level of logger is {self.logger.level}, not DEBUG"
+
+        # save log into file
+        self.logger_creator.set_saving_path("test_log", "testing.log")
+        assert os.path.exists("test_log/testing.log")
+        tsdb.purge_path("test_log/testing.log", ignore_errors=True)
 
 
 if __name__ == "__main__":

diff --git a/tsdb/__init__.py b/tsdb/__init__.py
@@ -1,5 +1,5 @@
 """
-tsdb package
+TSDB (Time Series Data Beans): a Python toolbox to ease loading public time-series datasets.
 """
 
 # Created by Wenjie Du <wenjay.du@gmail.com>
@@ -21,38 +21,42 @@
 #
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
-__version__ = "0.1"
+__version__ = "0.1.1"
 
 
-try:
-    from tsdb.data_processing import (
-        list_database,
-        list_available_datasets,
-        window_truncate,
-        download_and_extract,
-        load_dataset,
-        delete_cached_data,
-        purge_given_path,
-        list_cached_data,
-        CACHED_DATASET_DIR,
-        pickle_dump,
-        pickle_load,
-    )
-
-except Exception as e:
-    print(e)
+from tsdb.data_processing import (
+    list,
+    load,
+    download_and_extract,
+    list_cache,
+    delete_cache,
+    purge_path,
+    CACHED_DATASET_DIR,
+    pickle_dump,
+    pickle_load,
+    # below are deprecated functions, import for now, will be removed in v0.2
+    list_database,
+    list_available_datasets,
+    list_cached_data,
+    load_dataset,
+    delete_cached_data,
+)
 
 __all__ = [
     "__version__",
-    "list_database",
-    "list_available_datasets",
-    "window_truncate",
+    "list",
+    "load",
     "download_and_extract",
-    "load_dataset",
-    "delete_cached_data",
-    "purge_given_path",
-    "list_cached_data",
+    "list_cache",
+    "delete_cache",
+    "purge_path",
     "CACHED_DATASET_DIR",
     "pickle_dump",
     "pickle_load",
+    # below are deprecated functions, import for now, will be removed in v0.2
+    "list_database",
+    "list_available_datasets",
+    "list_cached_data",
+    "load_dataset",
+    "delete_cached_data",
 ]