In [1]:
import sklearn
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

In [2]:
class MyTransformer(sklearn.base.TransformerMixin,  sklearn.base.BaseEstimator):
    def __init__(self, k_first=1):
        super().__init__()
        self.k_first = k_first
        self.cols = None

    def fit(self, X, y, **fit_params):
        n = min(self.k_first, X.shape[1])
        self.cols = [True] * n + [False] * max(X.shape[1] - self.k_first, 0)
        return self

    def transform(self, X):
        return X[:, self.cols]

In [3]:
X = np.array([[1,2,3],[4,5,6], [7,8,9], [10,5,12]])
y = np.array([1,0,1, 0])

In [4]:
y

array([1, 0, 1, 0])

In [5]:
t1 = MyTransformer(k_first=2)
t2 = MyTransformer(k_first=1)

In [6]:
Z1 = t1.fit(X,y).transform(X)
Z2 = t2.fit(X,y).transform(X)

In [7]:
from sklearn.linear_model import LogisticRegression

In [8]:
logit1 = LogisticRegression(C = 3)
logit1.fit(Z1,y)

logit2 = LogisticRegression(C = 10)
logit2.fit(Z2,y)

LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [9]:
logit1.predict(Z1)

array([1, 1, 1, 0])

In [10]:
logit2.predict(Z2)

array([1, 1, 0, 0])

In [11]:
accuracy_score(y, logit1.predict(Z1))

0.75

In [12]:
accuracy_score(y, logit2.predict(Z2))

0.5

In [13]:
from sklearn.ensemble import VotingClassifier

In [14]:
t_log_1 = Pipeline(
        steps=[
            ('t', t1),
            ('c', logit1)
        ])

In [15]:
t_log_2 = Pipeline(
        steps=[
            ('t', t2),
            ('c', logit2)
        ])

In [16]:
hard = VotingClassifier(estimators=[('a',t_log_1),('b', t_log_2)], weights=[1.0, 1.0])

In [17]:
hard.fit(X,y)

VotingClassifier(estimators=[('a',
                              Pipeline(memory=None,
                                       steps=[('t', MyTransformer(k_first=2)),
                                              ('c',
                                               LogisticRegression(C=3,
                                                                  class_weight=None,
                                                                  dual=False,
                                                                  fit_intercept=True,
                                                                  intercept_scaling=1,
                                                                  l1_ratio=None,
                                                                  max_iter=100,
                                                                  multi_class='auto',
                                                                  n_jobs=None,
                                                 

In [18]:
accuracy_score(y, hard.predict(X))

0.5

In [19]:
hard.predict(X)

array([1, 1, 0, 0])