Merge pull request #86 from eth-cscs/release-0.6.1

Release 0.6.1
eth-cscs · Jan 21, 2021 · e4622ce · e4622ce
2 parents 506383d + 59c4e48
commit e4622ce
Show file tree

Hide file tree

Showing 15 changed files with 254 additions and 128 deletions.
diff --git a/.gitignore b/.gitignore
@@ -91,3 +91,5 @@ ENV/
 # vim backup files
 #
 **/.*.sw?
+
+.idea/
diff --git a/.travis.yml b/.travis.yml
@@ -40,6 +40,7 @@ jobs:
           python: "3.8"
           env:
             - NO_PYTORCH=true
+            - UNIT_TEST=true
         - stage: deploy
           script:
             - echo "Required dummy override of default 'script' in .travis.yml."

diff --git a/README.md b/README.md
@@ -27,9 +27,9 @@ scientists by providing
 # Documentation
 For more information, check out the
 
-* [Documentation](http://abcpy.readthedocs.io/en/v0.6.0) 
-* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.6.0/examples) directory and
-* [Reference](http://abcpy.readthedocs.io/en/v0.6.0/abcpy.html)
+* [Documentation](http://abcpy.readthedocs.io/en/v0.6.1) 
+* [Examples](https://github.com/eth-cscs/abcpy/tree/v0.6.1/examples) directory and
+* [Reference](http://abcpy.readthedocs.io/en/v0.6.1/abcpy.html)
 
 
 Further, we provide a
@@ -93,6 +93,8 @@ ABCpy for your publication, we would appreciate a citation. You can use
 
 Publications in which ABCpy was applied:
 
+* L. Pacchiardi, R. Dutta. "Score Matched Conditional Exponential Families for Likelihood-Free Inference", 2020, arXiv:2012.10903.
+
 * R. Dutta, K. Zouaoui-Boudjeltia, C. Kotsalos, A. Rousseau, D. Ribeiro de Sousa, J. M. Desmet, 
 A. Van Meerhaeghe, A. Mira, and B. Chopard. "Interpretable pathological test for Cardio-vascular 
 disease: Approximate Bayesian computation with distance learning.", 2020, arXiv:2010.06465.
@@ -117,8 +119,7 @@ Approximate Bayesian Computation to Model a Volcanic Eruption", 2020, Sankhya B,
 * A. Ebert, R. Dutta, P. Wu, K. Mengersen and A. Mira, "Likelihood-Free
   Parameter Estimation for Dynamic Queueing Networks", 2018, arXiv:1804.02526.
 
-* R. Dutta, M. Schoengens, A. Ummadisingu, N. Widerman, J. P.  Onnela, A. Mira, "ABCpy: A
-  High-Performance Computing Perspective to Approximate Bayesian Computation", 2017, arXiv:1711.04694.
+* R. Dutta, M. Schoengens, L. Pacchiardi, A. Ummadisingu, N. Widerman, J. P.  Onnela, A. Mira, "ABCpy:       A High-Performance Computing Perspective to Approximate Bayesian Computation", 2020, arXiv:1711.04694.
 
 ## License
 ABCpy is published under the BSD 3-clause license, see [here](LICENSE).

diff --git a/abcpy/approx_lhd.py b/abcpy/approx_lhd.py
@@ -27,8 +27,8 @@ def __init__(self, statistics_calc):
         raise NotImplemented
 
     @abstractmethod
-    def likelihood(self, y_obs, y_sim):
-        """To be overwritten by any sub-class: should compute the approximate likelihood 
+    def loglikelihood(self, y_obs, y_sim):
+        """To be overwritten by any sub-class: should compute the approximate loglikelihood
         value given the observed data set y_obs and the data set y_sim simulated from
         model set at the parameter value.
 
@@ -42,11 +42,14 @@ def likelihood(self, y_obs, y_sim):
         Returns
         -------
         float
-            Computed approximate likelihood.
+            Computed approximate loglikelihood.
         """
 
         raise NotImplemented
 
+    def likelihood(self, y_obs, y_sim):
+        return np.exp(self.loglikelihood(y_obs, y_sim))
+
 
 class SynLikelihood(Approx_likelihood):
     """This class implements the approximate likelihood function which computes the approximate
@@ -66,7 +69,7 @@ def __init__(self, statistics_calc):
         self.data_set = None
         self.statistics_calc = statistics_calc
 
-    def likelihood(self, y_obs, y_sim):
+    def loglikelihood(self, y_obs, y_sim):
         # print("DEBUG: SynLikelihood.likelihood().")
         if not isinstance(y_obs, list):
             # print("type(y_obs) : ", type(y_obs), " , type(y_sim) : ", type(y_sim))
@@ -97,21 +100,21 @@ def likelihood(self, y_obs, y_sim):
         mean_sim = np.mean(stat_sim, 0)
         lw_cov_, _ = ledoit_wolf(stat_sim)
         robust_precision_sim = np.linalg.inv(lw_cov_)
-        robust_precision_sim_det = np.linalg.det(robust_precision_sim)
+        sign_logdet, robust_precision_sim_logdet = np.linalg.slogdet(robust_precision_sim)  # we do not need sign
         # print("DEBUG: combining.")
         # we may have different observation; loop on those now:
         # likelihoods = np.zeros(self.stat_obs.shape[0])
         # for i, single_stat_obs in enumerate(self.stat_obs):
         #     x_new = np.einsum('i,ij,j->', single_stat_obs - mean_sim, robust_precision_sim, single_stat_obs - mean_sim)
         #     likelihoods[i] = np.exp(-0.5 * x_new)
         # do without for loop:
-        diff = self.stat_obs - mean_sim.reshape(1,-1)
+        diff = self.stat_obs - mean_sim.reshape(1, -1)
         x_news = np.einsum('bi,ij,bj->b', diff, robust_precision_sim, diff)
-        likelihoods = np.exp(-0.5 * x_news)
+        logliks = -0.5 * x_news
         # looks like we are exponentiating the determinant as well, which is wrong;
         # this is however a constant which should not change the algorithms afterwards.
-        factor = pow(np.sqrt((1 / (2 * np.pi)) * robust_precision_sim_det), self.stat_obs.shape[0])
-        return np.prod(likelihoods) * factor  # compute the product of the different likelihoods for each observation
+        logfactor = 0.5 * self.stat_obs.shape[0] * (np.log(1 / (2 * np.pi)) + robust_precision_sim_logdet)
+        return np.sum(logliks) + logfactor  # compute the sum of the different loglikelihoods for each observation
 
 
 class PenLogReg(Approx_likelihood, GraphTools):
@@ -165,10 +168,8 @@ def __init__(self, statistics_calc, model, n_simulate, n_folds=10, max_iter=1000
 
         self.stat_obs = None
         self.data_set = None
-
 
-
-    def likelihood(self, y_obs, y_sim):
+    def loglikelihood(self, y_obs, y_sim):
         if not isinstance(y_obs, list):
             raise TypeError('Observed data is not of allowed types')
 
@@ -206,10 +207,9 @@ def likelihood(self, y_obs, y_sim):
         groups += groups  # duplicate it as groups need to be defined for both datasets
         m = LogitNet(alpha=1, n_splits=self.n_folds, max_iter=self.max_iter, random_state=self.seed, scoring="log_loss")
         m = m.fit(X, y, groups=groups)
-        result = np.exp(-np.sum((m.intercept_+np.sum(np.multiply(m.coef_,self.stat_obs),axis=1)),axis=0))
-
-        return result
+        result = -np.sum((m.intercept_ + np.sum(np.multiply(m.coef_, self.stat_obs), axis=1)), axis=0)
 
+        return result
 
     def _simulate_ref_data(self, rng=np.random.RandomState()):
         """

diff --git a/abcpy/distances.py b/abcpy/distances.py
@@ -268,6 +268,8 @@ class LogReg(Distance):
     """This class implements a distance measure based on the classification
     accuracy [1]. The classification accuracy is calculated between two dataset d1 and d2 using
     logistics regression and return it as a distance. The maximum value of the distance is 1.0.
+    The logistic regression may not converge when using one single sample in each dataset (as for instance by putting
+    n_samples_per_param=1 in an inference routine).
 
     [1] Gutmann, M. U., Dutta, R., Kaski, S., & Corander, J. (2018). Likelihood-free inference via classification.
     Statistics and Computing, 28(2), 411-425.