update to use tqdm instead of easydev/joblib

cokelaer · Sep 14, 2022 · 3327912 · 3327912
1 parent d94875b
commit 3327912
Show file tree

Hide file tree

Showing 11 changed files with 57 additions and 82 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -6,6 +6,8 @@ on:
       - main
   pull_request:
     branches-ignore: []
+  schedule:
+    - cron: '0 0 * * SUN'
 
 jobs:
   build-linux:
@@ -14,6 +16,7 @@ jobs:
       max-parallel: 5
       matrix:
         python: [3.6, 3.7, 3.8, 3.9]
+      fail-fast: false
 
     steps:
     - uses: actions/checkout@v2

diff --git a/README.rst b/README.rst
@@ -22,6 +22,10 @@ FITTER documentation
 
 Compatible with Python 3.6, 3.7, and 3.8, 3.9
 
+.. image:: https://raw.githubusercontent.com/cokelaer/fitter/main/doc/_static/fitter_256x256.png
+    :target: https://raw.githubusercontent.com/cokelaer/fitter/main/doc/_static/fitter_256x256.png
+
+
 
 What is it ?
 ################
@@ -102,6 +106,8 @@ Changelog
 ========= ==========================================================================
 Version   Description
 ========= ==========================================================================
+1.5.0     * removed easydev and replaced by tqdm for progress bar
+          * progressbar from tqdm also allows replacement of joblib need
 1.4.1     * Update timeout in docs from 10 to 30 seconds by @mpadge in 
             https://github.com/cokelaer/fitter/pull/47
           * Add Kolmogorov-Smirnov goodness-of-fit statistic by @lahdjirayhan in 

diff --git a/doc/_static/fitter_256x256.png b/doc/_static/fitter_256x256.png
diff --git a/doc/_static/fitter_64x64.png b/doc/_static/fitter_64x64.png
diff --git a/doc/_static/fitter_680x680.png b/doc/_static/fitter_680x680.png
diff --git a/doc/conf.py b/doc/conf.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 #
 # Configuration file for the Sphinx documentation builder.
 #
@@ -25,7 +24,7 @@
 version = pkg_resources.require("fitter")[0].version
 
 project = 'fitter'
-copyright = '2019, Thomas Cokelaer'
+copyright = '2019-2022, Thomas Cokelaer'
 author = 'Thomas Cokelaer'
 
 # The short X.Y version
@@ -51,7 +50,6 @@
     'sphinx.ext.coverage',
     'sphinx.ext.doctest',
     'sphinx.ext.todo',
-    #'sphinx.ext.mathjax',
     'sphinx.ext.ifconfig',
     'sphinx.ext.viewcode',
     'matplotlib.sphinxext.plot_directive',
@@ -112,6 +110,8 @@
 #
 # html_sidebars = {}
 
+html_logo = "_static/fitter_256x256.png"
+
 
 # -- Options for HTMLHelp output ---------------------------------------------
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,7 +1,6 @@
-easydev
-numpy
+click
 matplotlib
-scipy>=0.18
+numpy
 pandas
-click
-joblib
+scipy>=0.18
+tqdm
diff --git a/src/fitter/fitter.py b/src/fitter/fitter.py
@@ -1,6 +1,6 @@
 #  This file is part of the fitter software
 #
-#  Copyright (c) 2014
+#  Copyright (c) 2014-2022
 #
 #  File author(s): Thomas Cokelaer <cokelaer@gmail.com>
 #
@@ -27,8 +27,7 @@
 import pandas as pd
 import pylab
 import scipy.stats
-from easydev import Progress
-from joblib import Parallel, delayed
+from tqdm import tqdm
 from scipy.stats import entropy as kl_div, kstest
 
 logger = logging.getLogger(__name__)
@@ -199,17 +198,15 @@ def _init(self):
         self._ks_stat = {}
         self._ks_pval = {}
         self._fit_i = 0  # fit progress
-        self.pb = None
+        #self.pb = None
 
     def _update_data_pdf(self):
         # histogram retuns X with N+1 values. So, we rearrange the X output into only N
         self.y, self.x = np.histogram(self._data, bins=self.bins, density=self._density)
         self.x = [(this + self.x[i + 1]) / 2.0 for i, this in enumerate(self.x[0:-1])]
 
     def _trim_data(self):
-        self._data = self._alldata[
-            np.logical_and(self._alldata >= self._xmin, self._alldata <= self._xmax)
-        ]
+        self._data = self._alldata[np.logical_and(self._alldata >= self._xmin, self._alldata <= self._xmax)]
 
     def _get_xmin(self):
         return self._xmin
@@ -223,9 +220,7 @@ def _set_xmin(self, value):
         self._trim_data()
         self._update_data_pdf()
 
-    xmin = property(
-        _get_xmin, _set_xmin, doc="consider only data above xmin. reset if None"
-    )
+    xmin = property(_get_xmin, _set_xmin, doc="consider only data above xmin. reset if None")
 
     def _get_xmax(self):
         return self._xmax
@@ -239,9 +234,7 @@ def _set_xmax(self, value):
         self._trim_data()
         self._update_data_pdf()
 
-    xmax = property(
-        _get_xmax, _set_xmax, doc="consider only data below xmax. reset if None "
-    )
+    xmax = property(_get_xmax, _set_xmax, doc="consider only data below xmax. reset if None ")
 
     def _load_all_distributions(self):
         """Replace the :attr:`distributions` attribute with all scipy distributions"""
@@ -263,7 +256,7 @@ def hist(self):
         _ = pylab.hist(self._data, bins=self.bins, density=self._density)
         pylab.grid(True)
 
-    def _fit_single_distribution(self, distribution, progress: bool):
+    def _fit_single_distribution(self, distribution):
         try:
             # need a subprocess to check time it takes. If too long, skip it
             dist = eval("scipy.stats." + distribution)
@@ -300,9 +293,7 @@ def _fit_single_distribution(self, distribution, progress: bool):
             dist_fitted = dist(*param)
             ks_stat, ks_pval = kstest(self._data, dist_fitted.cdf)
 
-            logging.info(
-                "Fitted {} distribution with error={})".format(distribution, sq_error)
-            )
+            logging.info("Fitted {} distribution with error={})".format(distribution, sq_error))
 
             # compute some errors now
             self._fitted_errors[distribution] = sq_error
@@ -312,21 +303,17 @@ def _fit_single_distribution(self, distribution, progress: bool):
             self._ks_stat[distribution] = ks_stat
             self._ks_pval[distribution] = ks_pval
         except Exception:  # pragma: no cover
-            logging.warning(
-                "SKIPPED {} distribution (taking more than {} seconds)".format(
-                    distribution, self.timeout
-                )
-            )
+            logging.warning("SKIPPED {} distribution (taking more than {} seconds)".format(distribution, self.timeout))
             # if we cannot compute the error, set it to large values
             self._fitted_errors[distribution] = np.inf
             self._aic[distribution] = np.inf
             self._bic[distribution] = np.inf
             self._kldiv[distribution] = np.inf
-        if progress:
-            self._fit_i += 1
-            self.pb.animate(self._fit_i)
+        #if srogress:
+        #    self._fit_i += 1
+        #    #self.pb.animate(self._fit_i)
 
-    def fit(self, amp=1, progress=False, n_jobs=-1):
+    def fit(self, progress=False, n_jobs=-1):
         r"""Loop over distributions and find best parameter to fit the data for each
 
         When a distribution is fitted onto the data, we populate a set of
@@ -344,23 +331,22 @@ def fit(self, amp=1, progress=False, n_jobs=-1):
 
         warnings.filterwarnings("ignore", category=RuntimeWarning)
 
-        if progress:
-            self.pb = Progress(len(self.distributions))
+        from tqdm.contrib.concurrent import thread_map
+
+        result = thread_map(self._fit_single_distribution, self.distributions, max_workers=4, disable=not progress)
+
+        #jobs = (delayed(self._fit_single_distribution)(dist, progress) for dist in self.distributions)
+        #pool = Parallel(n_jobs=n_jobs, backend="threading")
+        #_ = pool(jobs)
 
-        jobs = (
-            delayed(self._fit_single_distribution)(dist, progress)
-            for dist in self.distributions
-        )
-        pool = Parallel(n_jobs=n_jobs, backend="threading")
-        _ = pool(jobs)
         self.df_errors = pd.DataFrame(
             {
                 "sumsquare_error": self._fitted_errors,
                 "aic": self._aic,
                 "bic": self._bic,
                 "kl_div": self._kldiv,
                 "ks_statistic": self._ks_stat,
-                "ks_pvalue": self._ks_pval
+                "ks_pvalue": self._ks_pval,
             }
         )
 
@@ -406,11 +392,7 @@ def get_best(self, method="sumsquare_error"):
         name = self.df_errors.sort_values(method).iloc[0].name
         params = self.fitted_param[name]
         distribution = getattr(scipy.stats, name)
-        param_names = (
-            (distribution.shapes + ", loc, scale").split(", ")
-            if distribution.shapes
-            else ["loc", "scale"]
-        )
+        param_names = (distribution.shapes + ", loc, scale").split(", ") if distribution.shapes else ["loc", "scale"]
 
         param_dict = {}
         for d_key, d_val in zip(param_names, params):
@@ -463,9 +445,7 @@ def suicide(self):  # pragma: no cover
         ended_at = datetime.now()
         diff = ended_at - started_at
 
-        if (
-            it.exc_info[0] is not None
-        ):  # pragma: no cover ;  if there were any exceptions
+        if it.exc_info[0] is not None:  # pragma: no cover ;  if there were any exceptions
             a, b, c = it.exc_info
             raise Exception(a, b, c)  # communicate that to caller
 

diff --git a/src/fitter/histfit.py b/src/fitter/histfit.py
@@ -84,9 +84,7 @@ def __init__(self, data=None, X=None, Y=None, bins=None):
 
             self.N = len(self.X)
             self.guess_mean = self.X[int(self.N / 2)]
-            self.guess_std = sqrt(sum((self.X - mean(self.X)) ** 2) / self.N) / (
-                sqrt(2 * 3.14)
-            )
+            self.guess_std = sqrt(sum((self.X - mean(self.X)) ** 2) / self.N) / (sqrt(2 * 3.14))
             self.guess_amp = 1.0
 
         self.func = self._func_normal
@@ -112,16 +110,10 @@ def fit(
             # 5% error on the data to add errors
             self.E = [scipy.stats.norm.rvs(0, error_rate) for y in self.Y]
             # [scipy.stats.norm.rvs(0, self.std_data * error_rate) for x in range(self.N)]
-            self.result = scipy.optimize.least_squares(
-                self.func, (self.guess_mean, self.guess_std, self.guess_amp)
-            )
+            self.result = scipy.optimize.least_squares(self.func, (self.guess_mean, self.guess_std, self.guess_amp))
 
             mu, sigma, amplitude = self.result["x"]
-            pylab.plot(
-                self.X,
-                amplitude * scipy.stats.norm.pdf(self.X, mu, sigma),
-                **error_kwargs
-            )
+            pylab.plot(self.X, amplitude * scipy.stats.norm.pdf(self.X, mu, sigma), **error_kwargs)
             self.sigmas.append(sigma)
             self.amplitudes.append(amplitude)
             self.mus.append(mu)
@@ -131,11 +123,7 @@ def fit(
         self.amplitude = mean(self.amplitudes)
         self.mu = mean(self.mus)
 
-        pylab.plot(
-            self.X,
-            self.amplitude * scipy.stats.norm.pdf(self.X, self.mu, self.sigma),
-            **fit_kwargs
-        )
+        pylab.plot(self.X, self.amplitude * scipy.stats.norm.pdf(self.X, self.mu, self.sigma), **fit_kwargs)
         if semilogy:
             pylab.semilogy()
         pylab.grid()
@@ -150,18 +138,12 @@ def fit(
         pylab.fill_between(self.X, M - S, M + S, color="gray", alpha=0.5)
         # pylab.plot(self.X, M-S, color="k")
         # pylab.plot(self.X, M+S, color="k")
-        pylab.plot(
-            self.X,
-            self.amplitude * scipy.stats.norm.pdf(self.X, self.mu, self.sigma),
-            **fit_kwargs
-        )
+        pylab.plot(self.X, self.amplitude * scipy.stats.norm.pdf(self.X, self.mu, self.sigma), **fit_kwargs)
         pylab.grid()
 
         return self.mu, self.sigma, self.amplitude
 
     def _func_normal(self, param):
         # amplitude is supposed to be 1./(np.sqrt(2*np.pi)*sigma)* if normalised
         mu, sigma, A = param
-        return sum(
-            (A * scipy.stats.norm.pdf(self.X, mu, sigma) - (self.Y + self.E)) ** 2
-        )
+        return sum((A * scipy.stats.norm.pdf(self.X, mu, sigma) - (self.Y + self.E)) ** 2)
diff --git a/src/fitter/main.py b/src/fitter/main.py
@@ -42,18 +42,14 @@ def main():  # pragma: no cover
 @main.command()
 @click.argument("filename", type=click.STRING)
 @click.option("--column-number", type=click.INT, default=1)
-@click.option(
-    "--delimiter", type=click.STRING, default=",", help="look at the first column"
-)
+@click.option("--delimiter", type=click.STRING, default=",", help="look at the first column")
 @click.option(
     "--distributions",
     type=click.STRING,
     default="gamma,beta",
     help="llist of distribution",
 )
-@click.option(
-    "--tag", type=click.STRING, default="fitter", help="tag to name output files"
-)
+@click.option("--tag", type=click.STRING, default="fitter", help="tag to name output files")
 @click.option("--progress/--no-progress", default=True)
 @click.option("--verbose/--no-verbose", default=True)
 def fitdist(**kwargs):
@@ -94,6 +90,12 @@ def fitdist(**kwargs):
     with open("{}.log".format(tag), "w") as fout:
         fout.write(msg)
 
+@main.command()
+def show_distributions(**kwargs):
+    from fitter import get_distributions
+    print("\n".join(get_distributions()))
+
+
 
 if __name__ == "__main__":  # pragma: no cover
     main()
diff --git a/test/test_main.py b/test/test_main.py
@@ -1,9 +1,9 @@
 import pytest
 from pathlib import Path
-from easydev import TempFile
 import subprocess
 from scipy import stats
 from fitter.main import fitdist
+from fitter.main import show_distributions
 
 @pytest.fixture
 def setup_teardown():
@@ -37,3 +37,6 @@ def test_main_app(setup_teardown):
 
     results = runner.invoke(fitdist, ['test.csv', "--progress", "--column-number", 1])
     assert results.exit_code == 0
+
+    results = runner.invoke(show_distributions, [])
+    assert results.exit_code == 0