# Feature Droper

A transformer object that delete columns, according to the selected features

> from mlearner.preprocessing import FeatureDroper

## Example 1 - Droper features

Use the `fit` method to fit the column droped (e.g., the training dataset) to a new `FeatureDroper` object. Then, call the `transform` method on the same dataframe to apply drop.

In [2]:
import pandas as pd
from droper import FeatureDropper
X_train = pd.DataFrame({"a": [0, 1], "b": [10, 11], "c": [20, 21]})
fd = FeatureDropper(drop=["a", "b"]).fit(X_train)
fd.transform(X_train)

Unnamed: 0,c
0,20
1,21


## API

In [22]:
with open('../../api_modules/mlearner.preprocessing/FeatureDropper.md', 'r') as f:
    print(f.read())

FileNotFoundError: [Errno 2] No such file or directory: '../../api_modules/mlearner.preprocessing/FeatureDropper.md'

In [26]:
set(X_train.columns)

{'a', 'b', 'c'}

In [28]:
set(["a", "b"])

{'a', 'b'}

In [33]:
X_train.columns.tolist()

['a', 'b', 'c']

In [52]:
lista = ["a", "d", "g", "c"]
for i in lis:
    if i not in X_train.columns.tolist():
        print("NO")



NO


In [53]:
[i for i in lista if i not in X_train.columns.tolist()]

['d', 'g']

In [73]:
import numpy as np
ok = [2, 2, 2, float('nan'), 4, 5]
ok
# np.median(ok)

[2, 2, 2, nan, 4, 5]

In [85]:
data_median = pd.DataFrame({"a": [2, 2, 2, 2, 4, 5], "c": ["OK", 2, 2, 2 ,4, 5]})
data_median

Unnamed: 0,a,c
0,2,OK
1,2,2
2,2,2
3,2,2
4,4,4
5,5,5


In [78]:
col = ["a"]

In [81]:
 isinstance(col, list)

True

In [103]:
data = pd.DataFrame({"a": [2, 2, 2, float('nan'), 4, 5], "b": ["OK", 2, 2, 2, 4, 5]})
data1 = pd.DataFrame({"a": [2, 5, 2, 5, 4, 5], "b": [2, 2, 2, 2, 4, 5]})


In [119]:
data.mean()

a    3.0
dtype: float64

In [92]:
_lista = [i for i in ["a", "b"] if i not in data.columns.tolist()]
_lista

[]

In [107]:
_types = data1.dtypes

In [133]:
data1

Unnamed: 0,a,b
0,2,2
1,5,2
2,2,2
3,5,2
4,4,4
5,5,5


In [139]:
from sklearn.preprocessing import Imputer




array([[2., 2.],
       [5., 2.],
       [2., 2.],
       [5., 2.],
       [4., 4.],
       [5., 5.]])

In [10]:
data = pd.DataFrame({"a": [2, 2, 2, float('nan'), 4, 5], "b": ["OK", "OK", "OK", float('nan'), "NOK", "OK"]})
data

Unnamed: 0,a,b
0,2.0,OK
1,2.0,OK
2,2.0,OK
3,,
4,4.0,NOK
5,5.0,OK


In [14]:
data["b"] = data["b"].fillna(method="ffill")
data

Unnamed: 0,a,b
0,2.0,OK
1,2.0,OK
2,2.0,OK
3,,OK
4,4.0,NOK
5,5.0,OK


In [16]:
from sklearn.base import BaseEstimator, TransformerMixin
class FillNaTransformer_forward(BaseEstimator, TransformerMixin):

    """This transformer handles missing values closer forward.

    Attributes
    ----------
    columns: list of columns to transformer [n_columns]

    Examples
    --------
    For usage examples, please see
    https://jaisenbe58r.github.io/MLearner/user_guide/preprocessing/FillNaTransformer_forward/

    """
    def __init__(self, columns=None):
        """Init replace missing values."""
        if columns is not None:
            if isinstance(columns, list) or isinstance(columns, tuple):
                self.columns = columns
            else:
                raise NameError("Invalid type {}".format(type(columns)))
        else:
            self.columns = columns

    def fit(self, X, y=None, **fit_params):
        """Gets the columns to make a replace missing values.

        Parameters
        ----------
        X : {Dataframe}, shape = [n_samples, n_features]
            Dataframe, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        --------
        self

        """
        if not isinstance(X, pd.core.frame.DataFrame):
            raise NameError("Invalid type {}".format(type(X)))

        if self.columns is None:
            self.columns = X.columns

        _lista = [i for i in self.columns if i not in X.columns.tolist()]
        if len(_lista) > 0:
            raise NameError("The columns {} no exist in Dataframe".format(_lista))
        self._fitted = True
        return self

    def transform(self, X):
        """this transformer handles missing values.

        Parameters
        ----------
        X : {Dataframe}, shape = [n_samples, n_features]
            Dataframe of samples, where n_samples is the number of samples and
            n_features is the number of features.

        Returns
        -------
        X_transform : {Dataframe}, shape = [n_samples, n_features]
            A copy of the input Dataframe with the columns replaced.

        """
        if not hasattr(self, "_fitted"):
            raise AttributeError("FillNaTransformer_backward has not been fitted, yet.")

        if not isinstance(X, pd.core.frame.DataFrame):
            raise NameError("Invalid type {}".format(type(X)))

        X[self.columns] = X[self.columns].fillna(method="ffill")
        return X


In [20]:
ft1 = FillNaTransformer_forward(columns=["a"])
ft1.fit(data)
ft1.transform(data)

Unnamed: 0,a,b
0,2.0,OK
1,2.0,OK
2,2.0,OK
3,2.0,OK
4,4.0,NOK
5,5.0,OK


In [21]:
ft1 = FillNaTransformer_forward(columns=["b"])
ft1.fit(data)
ft1.transform(data)

Unnamed: 0,a,b
0,2.0,OK
1,2.0,OK
2,2.0,OK
3,2.0,OK
4,4.0,NOK
5,5.0,OK
