In [None]:
import warnings
import numpy as np
from depynd.information import mutual_information, conditional_mutual_information
class DI_MRMR():
    def __init__(self, feature_num):
 
        self.feature_num = feature_num
        self._selected_features = []

    def fit(self, X, y):
        """
        fit an array data
        :param X: a numpy array
        :param y: the label, a list or one dimension array
        :return:
        """
        assert X.shape[0] == len(y), "X and y not in the same length!"  

        if self.feature_num > X.shape[1]: 
            self.feature_num = X.shape[1]
            warnings.warn("The feature_num has to be set less or equal to {}".format(X.shape[1]), UserWarning)
        MIs = self.feature_label_MIs(X, y)
        max_MI_arg = np.argmax(MIs) 
        selected_features = []
        MIs = list(zip(range(len(MIs)), MIs))  
        selected_features.append(MIs.pop(int(max_MI_arg)))
        if self.feature_num == 1:
            print()
        else:
            while True:
                max_theta = float("-inf")   
                max_theta_index = None
                for mi_outset in MIs:
                    ff_mis = []
                    selected_y = []
                    all_selected = []
                    for mi_inset in selected_features:
                        a = conditional_mutual_information(X[:,mi_inset[0]],y,X[:,mi_outset[0]])
                        all_selected.append(a)
                        selected_y.append(mi_inset[0])
                        ff_mi = self.feature_feature_MIs(X[:, mi_outset[0]], X[:, mi_inset[0]])
                        ff_mis.append(ff_mi)
                    theta = mi_outset[1] - 1 / len(selected_features) * sum(ff_mis)
                    DR_i = (sum(all_selected) / len(all_selected)) * (self.C_ratio(X, X[:, mi_outset[0]],selected_y, y)+ 1)
                    DR_j = DR_i * theta
                    if DR_j >= max_theta:
                        max_theta = DR_j
                        max_theta_index = mi_outset
                selected_features.append(max_theta_index)
                MIs.remove(max_theta_index)   
                if len(selected_features) >= self.feature_num:
                    break
        self._selected_features = [ind for ind, mi in selected_features]
        return self

    def transform(self, X):
        return X[:, self._selected_features]

    def fit_transform(self, X, y):
        self.fit(X, y)
        return self.transform(X)

    def feature_feature_MIs(self, x, y):
        Iij = mutual_information(x, y)
        return Iij

    def feature_label_MIs(self, arr, y):
        m, n = arr.shape
        MIs = []
        for i in range(n):
            mi_s = mutual_information(arr[:, i], y)
            MIs.append(mi_s)
        return MIs

    def important_features(self):
        return self._selected_features

    def C_ratio(self, X, x, selected_y, c):
        all_mi = []
        a = len(selected_y)
        for i in range(a):
            d = conditional_mutual_information(x, c, X[:, selected_y[i]])
            all_mi.append(d)
        d = sum(all_mi) / len(all_mi) - mutual_information(x, c)
        c = self.get_entropy(x) + self.get_entropy(c)
        return 2 * (d / c)

    def get_entropy(self, data_df):
        data_df = np.array(data_df)
        pe_value_array = np.unique(data_df)
        ent = 0.0
        for x_value in pe_value_array:
            count = 0
            for i in range(len(data_df)):
                if data_df[i] == x_value:
                    count = count + 1
            p = count / data_df.shape[0]
            logp = np.log2(p)
            ent -= p * logp
        return ent