dpuenteramirez · dpuenteramirez · May 8, 2022 · May 8, 2022 · May 8, 2022 · May 8, 2022
diff --git a/instance_selection/_CNN.py b/instance_selection/_CNN.py
@@ -8,11 +8,10 @@
 import numpy as np
 import pandas as pd
 
-from .utils import transform, delete_multiple_element
+from .utils import delete_multiple_element, transform
 
 
 class CNN:
-
     """
     Hart, P. (1968). The condensed nearest neighbor rule (corresp.). IEEE
     transactions on information theory, 14(3), 515-516.
@@ -54,7 +53,7 @@ def filter(self, samples, y):
         samples = transform(samples, y)
         store_classes, indexes = np.unique(samples.target, return_index=True)
         store_classes = store_classes.tolist()
-        store = [samples['data'][x] for x in indexes]
+        store = [samples["data"][x] for x in indexes]
 
         handbag = []
 
@@ -82,8 +81,8 @@ def filter(self, samples, y):
             delete_multiple_element(handbag, indexes)
         del handbag
         samples = pd.DataFrame(store, columns=self.x_attr)
-        y = pd.DataFrame(np.array(store_classes, dtype=object).flatten().astype(
-            int))
+        y = pd.DataFrame(
+            np.array(store_classes, dtype=object).flatten().astype(int))
 
         return samples, y
 
@@ -106,4 +105,4 @@ class of the sample in the store that is closest to the sample
         euc = np.array(euc)
         euc_nn = np.amin(euc)
         index_nn = np.ravel(np.where(euc == euc_nn))
-        return store_classes[index_nn[0]]
+        return store_classes[index_nn[0]]
diff --git a/instance_selection/_DROP3.py b/instance_selection/_DROP3.py
@@ -16,7 +16,6 @@
 
 
 class DROP3:
-
     """
     Wilson, D. R., & Martinez, T. R. (2000). Reduction techniques for
     instance-based learning algorithms. Machine learning, 38(3), 257-286.
@@ -63,11 +62,17 @@ def filter(self, samples, y):
         :param y: DataFrame.
         :return: the input dataset with the remaining samples.
         """
-        initial_distances, initial_samples, initial_targets, knn, \
-            samples_info = self._create_variables(samples, y)
-
-        self._find_associates(initial_distances, initial_samples,
-                              initial_targets, knn, samples_info)
+        (
+            initial_distances,
+            initial_samples,
+            initial_targets,
+            knn,
+            samples_info,
+        ) = self._create_variables(samples, y)
+
+        self._find_associates(
+            initial_distances, initial_samples, initial_targets, knn, samples_info
+        )
 
         initial_distances.sort(key=lambda x: x[2], reverse=True)
 
@@ -79,32 +84,37 @@ def filter(self, samples, y):
             with_, without = self._with_without(tuple(x_sample), samples_info)
 
             if without >= with_:
-                initial_distances = initial_distances[:index_x - removed] + \
-                                    initial_distances[index_x - removed + 1:]
+                initial_distances = (
+                    initial_distances[: index_x - removed]
+                    + initial_distances[index_x - removed + 1:]
+                )
                 removed += 1
 
                 for a_associate_of_x in samples_info[(tuple(x_sample))][1]:
                     a_neighs, remaining_samples = self._remove_from_neighs(
-                        a_associate_of_x, initial_distances,
-                        samples_info, x_sample)
+                        a_associate_of_x, initial_distances, samples_info, x_sample
+                    )
 
                     knn = NearestNeighbors(
                         n_neighbors=self.nearest_neighbors + 2,
-                        n_jobs=1, p=self.power_parameter)
+                        n_jobs=1,
+                        p=self.power_parameter,
+                    )
                     knn.fit(remaining_samples)
                     _, neigh_ind = knn.kneighbors([a_associate_of_x])
-                    possible_neighs = [initial_distances[x][0] for x in
-                                       neigh_ind[0]]
+                    possible_neighs = [initial_distances[x][0]
+                                       for x in neigh_ind[0]]
 
-                    self._find_new_neighs(a_associate_of_x, a_neighs,
-                                          possible_neighs, samples_info)
+                    self._find_new_neighs(
+                        a_associate_of_x, a_neighs, possible_neighs, samples_info
+                    )
 
                     new_neigh = a_neighs[-1]
-                    samples_info[tuple(new_neigh)][1].append(
-                        a_associate_of_x)
+                    samples_info[tuple(new_neigh)][1].append(a_associate_of_x)
 
-        samples = pd.DataFrame([x for x, _, _ in initial_distances],
-                               columns=self.x_attr)
+        samples = pd.DataFrame(
+            [x for x, _, _ in initial_distances], columns=self.x_attr
+        )
         y = pd.DataFrame([x for _, x, _ in initial_distances])
 
         return samples, y
@@ -122,23 +132,24 @@ def _create_variables(self, samples, y):
         self.x_attr = samples.keys()
         samples = transform(samples, y)
         s = copy.deepcopy(samples)
-        initial_samples = s['data']
-        initial_targets = s['target']
-        initial_samples, samples_index = np.unique(ar=initial_samples,
-                                                   return_index=True, axis=0)
+        initial_samples = s["data"]
+        initial_targets = s["target"]
+        initial_samples, samples_index = np.unique(
+            ar=initial_samples, return_index=True, axis=0
+        )
         initial_targets = initial_targets[samples_index]
-        knn = NearestNeighbors(n_neighbors=self.nearest_neighbors + 2, n_jobs=1,
-                               p=self.power_parameter)
+        knn = NearestNeighbors(
+            n_neighbors=self.nearest_neighbors + 2, n_jobs=1, p=self.power_parameter
+        )
         knn.fit(initial_samples)
-        samples_info = {tuple(x): [[], [], y] for x, y in zip(initial_samples,
-                                                              initial_targets)}
+        samples_info = {
+            tuple(x): [[], [], y] for x, y in zip(initial_samples, initial_targets)
+        }
         initial_distances = []
-        return initial_distances, initial_samples, initial_targets, knn, \
-            samples_info
+        return initial_distances, initial_samples, initial_targets, knn, samples_info
 
     @staticmethod
-    def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
-                         samples_info):
+    def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs, samples_info):
         """
         > The function takes a sample, finds its neighbors, and then checks if
         any of the neighbors are not already in the list of neighbors. If
@@ -162,8 +173,9 @@ def _find_new_neighs(a_associate_of_x, a_neighs, possible_neighs,
         samples_info[tuple(a_associate_of_x)][0] = a_neighs
 
     @staticmethod
-    def _remove_from_neighs(a_associate_of_x, initial_distances,
-                            samples_info, x_sample):
+    def _remove_from_neighs(
+        a_associate_of_x, initial_distances, samples_info, x_sample
+    ):
         """
         > It removes the sample `x_sample` from the list of neighbors of
         `a_associate_of_x` and returns the updated list of neighbors of
@@ -191,8 +203,9 @@ def _remove_from_neighs(a_associate_of_x, initial_distances,
         return a_neighs, remaining_samples
 
     @staticmethod
-    def _find_associates(initial_distances, initial_samples, initial_targets,
-                         knn, samples_info):
+    def _find_associates(
+        initial_distances, initial_samples, initial_targets, knn, samples_info
+    ):
         """
         For each sample in the initial set, find the closest sample from the
         other class and store it in the initial_distances list
@@ -245,12 +258,13 @@ def _with_without(x_sample, samples_info):
         associates_targets = [samples_info[tuple(x)][2] for x in x_associates]
         associates_neighs = [samples_info[tuple(x)][0] for x in x_associates]
 
-        for _, a_target, a_neighs in zip(x_associates,
-                                         associates_targets,
-                                         associates_neighs):
+        for _, a_target, a_neighs in zip(
+            x_associates, associates_targets, associates_neighs
+        ):
 
-            neighs_targets = np.ravel(np.array([samples_info[tuple(x)][2] for x
-                                                in a_neighs])).astype(int)
+            neighs_targets = np.ravel(
+                np.array([samples_info[tuple(x)][2] for x in a_neighs])
+            ).astype(int)
             neighs_targets = neighs_targets.tolist()
 
             count = np.bincount(neighs_targets[:-1])
@@ -261,8 +275,9 @@ def _with_without(x_sample, samples_info):
             for index_a, neigh in enumerate(a_neighs):
                 if np.array_equal(neigh, x_sample):
                     break
-            count = np.bincount(neighs_targets[:index_a] + neighs_targets[
-                                                           index_a + 1:])
+            count = np.bincount(
+                neighs_targets[:index_a] + neighs_targets[index_a + 1:]
+            )
             max_class = np.where(count == np.amax(count))[0][0]
             if max_class == a_target:
                 without += 1

diff --git a/instance_selection/_ENN.py b/instance_selection/_ENN.py
@@ -13,7 +13,6 @@
 
 
 class ENN:
-
     """
     Wilson, D. L. (1972). Asymptotic properties of nearest neighbor rules
     using edited data. IEEE Transactions on Systems, Man, and
@@ -62,12 +61,12 @@ def _neighs(self, s_samples, s_targets, index, removed):
         """
         x_sample = s_samples[index - removed]
         x_target = s_targets[index - removed]
-        knn = NearestNeighbors(n_jobs=-1,
-                               n_neighbors=self.nearest_neighbors, p=2)
-        samples_not_x = s_samples[:index - removed] + s_samples[
-                                                      index - removed + 1:]
-        targets_not_x = s_targets[:index - removed] + s_targets[
-                                                      index - removed + 1:]
+        knn = NearestNeighbors(
+            n_jobs=-1, n_neighbors=self.nearest_neighbors, p=2)
+        samples_not_x = s_samples[: index - removed] + \
+            s_samples[index - removed + 1:]
+        targets_not_x = s_targets[: index - removed] + \
+            s_targets[index - removed + 1:]
         knn.fit(samples_not_x)
         _, neigh_ind = knn.kneighbors([x_sample])
 
@@ -88,16 +87,18 @@ def filter(self, samples, y):
         """
         self.x_attr = samples.keys()
         samples = transform(samples, y)
-        size = len(samples['data'])
-        s_samples = list(samples['data'])
-        s_targets = list(samples['target'])
+        size = len(samples["data"])
+        s_samples = list(samples["data"])
+        s_targets = list(samples["target"])
         removed = 0
 
         for index in range(size):
-            _, x_target, targets_not_x, samples_not_x, neigh_ind = \
-                self._neighs(s_samples, s_targets, index, removed)
+            _, x_target, targets_not_x, samples_not_x, neigh_ind = self._neighs(
+                s_samples, s_targets, index, removed
+            )
             y_targets = np.ravel(
-                np.array([targets_not_x[x] for x in neigh_ind[0]])).astype(int)
+                np.array([targets_not_x[x] for x in neigh_ind[0]])
+            ).astype(int)
             count = np.bincount(y_targets)
             max_class = np.where(count == np.amax(count))[0][0]
             if max_class != x_target:
@@ -110,8 +111,7 @@ def filter(self, samples, y):
 
         return samples, y
 
-    def filter_original_complete(self, original, original_y, complete,
-                                 complete_y):
+    def filter_original_complete(self, original, original_y, complete, complete_y):
         """
         Modification of the Wilson Editing algorithm.
 
@@ -129,17 +129,19 @@ def filter_original_complete(self, original, original_y, complete,
         :return: the input dataset with the remaining samples.
         """
         self.x_attr = original.keys()
-        original, complete = transform_original_complete(original, original_y,
-                                                         complete, complete_y)
-        size = len(complete['data'])
-        s_samples = list(complete['data'])
-        s_targets = list(complete['target'])
-        o_samples = list(original['data'])
+        original, complete = transform_original_complete(
+            original, original_y, complete, complete_y
+        )
+        size = len(complete["data"])
+        s_samples = list(complete["data"])
+        s_targets = list(complete["target"])
+        o_samples = list(original["data"])
         removed = 0
 
         for index in range(size):
-            x_sample, x_target, targets_not_x, samples_not_x, neigh_ind = \
-                self._neighs(s_samples, s_targets, index, removed)
+            x_sample, x_target, targets_not_x, samples_not_x, neigh_ind = self._neighs(
+                s_samples, s_targets, index, removed
+            )
             y_targets = [targets_not_x[x] for x in neigh_ind[0]]
             count = np.bincount(np.ravel(y_targets))
             max_class = np.where(count == np.amax(count))[0][0]