In [61]:
import numpy as np 
import pandas as pd
import sklearn 
from sklearn.datasets import load_iris  as li 
from sklearn.metrics import accuracy_score 
from sklearn.preprocessing import StandardScaler 
from itertools import combinations
from sklearn.model_selection import train_test_split 

In [62]:
class SBS:
    def __init__(self,k_features=2,estimator=None):
        self.k_features=k_features 
        self.scoring=accuracy_score
        self.estimator=estimator
    
    def fit(self,X,y):
        n_sample,n_features=X.shape 
        dim=n_features 
        
        X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1)
        self.indices_=tuple(range(dim))
        self.subsets_=[self.indices_]

        self.scores_=[]
        score=self.calc_score(X_train,y_train,X_test,y_test,self.indices_)
        self.scores_.append(score)


        while dim>self.k_features:
            scores=[]
            subsets=[]
            
            for p in combinations(self.indices_,r=dim-1):
                
                score=self.calc_score(X_train,y_train,X_test,y_test,p)
                subsets.append(p)
                scores.append(score)


            best=np.argmax(scores)
            self.indices_=subsets[best]
            self.subsets_.append(subsets[best])

            dim=dim-1 

            self.scores_.append(scores[best])

        
        self.k_feature_score=self.scores_[-1]

        return self
    

    def transform(self,X):
        return X[:,self.indices_]
    


    def calc_score(self,X_train,y_train,X_test,y_test,p):
        self.estimator.fit(X_train[:,p],y_train)
        score=self.estimator.score(X_test[:,p],y_test)

        return score


In [63]:
X,y=li().data,li().target 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1)
sc=StandardScaler()
X_train_std=sc.fit_transform(X_train)
X_test_std=sc.transform(X_test)

In [64]:
y_train

array([1, 2, 1, 2, 2, 0, 1, 0, 1, 2, 2, 0, 2, 2, 1, 2, 0, 0, 0, 1, 0, 0,
       2, 2, 2, 2, 2, 1, 2, 1, 0, 2, 2, 0, 0, 2, 0, 2, 2, 1, 1, 2, 2, 0,
       1, 1, 2, 1, 2, 1, 0, 0, 0, 2, 0, 1, 2, 2, 0, 0, 1, 0, 2, 1, 2, 2,
       1, 2, 2, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 2, 2, 2, 0, 0, 1, 0, 2, 0,
       2, 2, 0, 2, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 2, 0,
       0, 2, 1, 2, 1, 2, 2, 1, 2, 0])

In [65]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression(C=10,solver='lbfgs',multi_class='ovr')
sbs=SBS(estimator=lr,k_features=1)
sbs.fit(X_train_std,y_train)

<__main__.SBS at 0x22f1abb4bd0>

In [66]:
best_k_feature=sbs.subsets_[2]
X_train_std[:,best_k_feature]

array([[ 0.31553662,  0.44767531],
       [ 2.2449325 ,  1.29769171],
       [-0.2873996 ,  0.05100098],
       [ 0.67729835,  1.01435291],
       [-0.04622511,  0.73101411],
       [-0.64916132, -1.30902526],
       [-0.40798684,  0.10766874],
       [-0.76974857, -1.36569302],
       [ 0.79788559,  0.44767531],
       [ 1.03906007,  1.12768843],
       [ 1.15964732,  0.95768515],
       [-0.89033581, -1.36569302],
       [ 0.19494938,  0.67434635],
       [ 0.5567111 ,  1.01435291],
       [ 0.91847283,  0.44767531],
       [ 2.2449325 ,  1.75103379],
       [-0.16681235, -1.19568974],
       [-1.01092305, -1.36569302],
       [-1.25209754, -1.36569302],
       [-0.76974857,  0.05100098],
       [-0.89033581, -1.30902526],
       [-0.40798684, -1.42236078],
       [ 2.2449325 ,  1.63769827],
       [ 1.28023456,  0.90101739],
       [ 1.76258353,  1.41102723],
       [ 0.67729835,  1.01435291],
       [ 0.43612386,  0.56101083],
       [ 0.19494938,  0.39100755],
       [ 2.2449325 ,

In [67]:
sbs.subsets_

[(0, 1, 2, 3), (0, 1, 2), (0, 2), (2,)]