In [20]:
from sklearn.neighbors import KDTree
import numpy as np

class ReliefF(object):

    """Feature selection using data-mined expert knowledge.
    
    Based on the ReliefF algorithm as introduced in:
    
    Kononenko, Igor et al. Overcoming the myopia of inductive learning algorithms with RELIEFF (1997), Applied Intelligence, 7(1), p39-55
    
    """

    def __init__(self, n_neighbors=100):
        """Sets up ReliefF to perform feature selection.

        Parameters
        ----------
        n_neighbors: int (default: 100)
            The number of neighbors to consider when assigning feature importance scores.
            More neighbors results in more accurate scores, but takes longer.

        Returns
        -------
        None

        """

        self.feature_scores = None
        self.top_features = None
        self.tree = None
        self.n_neighbors = n_neighbors

    def fit(self, X, y):
        """Computes the feature importance scores from the training data.

        Parameters
        ----------
        X: array-like {n_samples, n_features}
            Training instances to compute the feature importance scores from
        y: array-like {n_samples}
            Training labels

        Returns
        -------
        None

        """
        self.feature_scores = np.zeros(X.shape[1])
        self.tree = KDTree(X)

        for source_index in range(X.shape[0]):
            distances, indices = self.tree.query(
                X[source_index].reshape(1, -1), k=self.n_neighbors + 1)

            # First match is self, so ignore it
            for neighbor_index in indices[0][1:]:
                similar_features = X[source_index] == X[neighbor_index]
                label_match = y[source_index] == y[neighbor_index]

                # If the labels match, then increment features that match and decrement features that do not match
                # Do the opposite if the labels do not match
                if label_match:
                    self.feature_scores[similar_features] += 1.
                    self.feature_scores[~similar_features] -= 1.
                else:
                    self.feature_scores[~similar_features] += 1.
                    self.feature_scores[similar_features] -= 1.

        self.top_features = np.argsort(self.feature_scores)[::-1]
        return self.top_features

In [25]:
#working relief
import pandas as pd
import relieff
url = '/home/d19125691/Documents/Experiments/ontologyDCQ/onto-DCQ-FS/datasets/working/1. Wine/wine.data'
df = pd.read_csv(url, index_col=False)
#X = df.iloc[: , 1:]
#Y = df.iloc[:, 0]
iris = datasets.load_iris()
X = iris.data
Y = iris.target
r = ReliefF()

print(r.fit(X,Y)) # Will run by default on all processors concurrently


[3 2 0 1]


array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [10]:
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline
from skrebate import ReliefF
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

url = '/home/d19125691/Documents/Experiments/ontologyDCQ/onto-DCQ-FS/datasets/working/1. Wine/wine.data'
df = pd.read_csv(url, index_col=False)
X = df.iloc[: , 1:]
Y = df.iloc[:, 0]

clf = ReliefF(n_features_to_select=2, n_neighbors=100)

In [14]:
from ReliefF import ReliefF
from ReliefF import ReliefF
import numpy as np
from sklearn import datasets
import pandas as pd

#example of 2 class problem
data = np.array([[9,2,2],[5,1,0],[9,3,2],[8,3,1],[6,0,0]])
target = np.array([0,0,1,1,1])

fs = ReliefF(n_neighbors=1, n_features_to_keep=2)
X_train = fs.fit_transform(data, target)
print(X_train)
print("--------------")
print("(No. of tuples, No. of Columns before ReliefF) : "+str(data.shape)+
      "\n(No. of tuples , No. of Columns after ReliefF) : "+str(X_train.shape))


#example of multi class problem
iris = datasets.load_iris()
X = iris.data
Y = iris.target

fs = ReliefF(n_neighbors=20, n_features_to_keep=2)
X_train = fs.fit_transform(X, Y)
print("(No. of tuples, No. of Columns before ReliefF) : "+str(iris.data.shape)+
      "\n(No. of tuples, No. of Columns after ReliefF) : "+str(X_train.shape))

[[2 9]
 [1 5]
 [3 9]
 [3 8]
 [0 6]]
--------------
(No. of tuples, No. of Columns before ReliefF) : (5, 3)
(No. of tuples , No. of Columns after ReliefF) : (5, 2)
(No. of tuples, No. of Columns before ReliefF) : (150, 4)
(No. of tuples, No. of Columns after ReliefF) : (150, 2)


In [15]:
X_train

array([[0.2, 1.4],
       [0.2, 1.4],
       [0.2, 1.3],
       [0.2, 1.5],
       [0.2, 1.4],
       [0.4, 1.7],
       [0.3, 1.4],
       [0.2, 1.5],
       [0.2, 1.4],
       [0.1, 1.5],
       [0.2, 1.5],
       [0.2, 1.6],
       [0.1, 1.4],
       [0.1, 1.1],
       [0.2, 1.2],
       [0.4, 1.5],
       [0.4, 1.3],
       [0.3, 1.4],
       [0.3, 1.7],
       [0.3, 1.5],
       [0.2, 1.7],
       [0.4, 1.5],
       [0.2, 1. ],
       [0.5, 1.7],
       [0.2, 1.9],
       [0.2, 1.6],
       [0.4, 1.6],
       [0.2, 1.5],
       [0.2, 1.4],
       [0.2, 1.6],
       [0.2, 1.6],
       [0.4, 1.5],
       [0.1, 1.5],
       [0.2, 1.4],
       [0.2, 1.5],
       [0.2, 1.2],
       [0.2, 1.3],
       [0.1, 1.4],
       [0.2, 1.3],
       [0.2, 1.5],
       [0.3, 1.3],
       [0.3, 1.3],
       [0.2, 1.3],
       [0.6, 1.6],
       [0.4, 1.9],
       [0.3, 1.4],
       [0.2, 1.6],
       [0.2, 1.4],
       [0.2, 1.5],
       [0.2, 1.4],
       [1.4, 4.7],
       [1.5, 4.5],
       [1.5,