In [14]:
from sklearn.metrics import  accuracy_score
from sklearn.datasets import load_iris
from itertools import combinations
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [15]:
iris = datasets.load_iris()
X1=iris.data
y1=iris.target
data= pd.DataFrame(iris.data)
data.columns = iris.feature_names
data['class'] = iris.target

# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

In [16]:
#data.head()
#y = data['class']
#X = data.drop('class', axis = 1)

In [17]:
class logisticregression():
    def __init__(self,train_data,train_labels,lr=0.01,batch_size=None,epoch=10,print_every = 10):
        dummy_once = np.ones((len(train_data),1))
        self.train_data = np.hstack((dummy_once,train_data))
        self.train_labels = train_labels
        
        self.params = np.zeros((len(self.train_data[0]),1))
        
        self.lr = lr
        self.epoch = epoch
        self.batch_size = batch_size
        self.print_every = print_every
        
    def sigmoid(self,x):
        return 1/(1+np.exp(-x))
    
    def cost(self,y,y_pred):
        return -np.mean(y*np.log(y_pred)+(1-y)*np.log(1-y_pred))
    
    def gradient(self,y,y_pred,x):
        return np.dot(x.T,(y_pred-y))
    
    def train(self):
        for i in range(self.epoch):
            y_pred = self.sigmoid(np.dot(self.train_data,self.params))
            loss = self.cost(self.train_labels,y_pred)
            
            gra = self.gradient(self.train_labels,y_pred,self.train_data)
            
            self.params -= self.lr*gra
            
            if self.print_every:
                if i%self.print_every == 0 or i == self.epoch-1:
                    print('Epoch : {}  Loss: {}'.format(i,loss))
    def predict(self,test_data):
        result = self.sigmoid(np.dot(test_data,self.params[1:])+self.params[0])
        result[result >= 0.5 ] = 1
        result[result < 0.5 ] = 0
        return result
    
    def evaluate(self,test_data,labels):
        accuracy = accuracy_score(self.predict(test_data),labels)
        return accuracy

In [49]:
log=LogisticRegression()

In [54]:
def cross_validate(data,k=5):
    #divide data into k folds
    data_splitted = np.vsplit(data,k)
    aggrigate_result = []
    for i in range(len(data_splitted)):
        train = []
        test = []
        items = [j for j in range(len(data_splitted)) if j !=i ]
        test = data_splitted[i]
        for item in items:
            if len(train) == 0:
                train = data_splitted[item]
            else:
                train = np.concatenate((train,data_splitted[item]), axis=0)
            
        logistic = logisticregression(train[:,:-1],train[:,-1:],epoch=10,print_every=None)
        logistic.train()
        
        result = logistic.evaluate(test[:,:-1],test[:,-1:])
        aggrigate_result.append(result)
        
    return aggrigate_result

In [36]:
def get_combinations(items,number):
    comb_list = []
    for c in combinations(items, number):
        c = list(c)
        c.sort()
        comb_list.append(c)
        
    return comb_list

In [53]:
get_combinations([1,2,3,4],2)

[[1, 2], [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]]

In [52]:
def wrapper_forward(data,names,target_name,feature_count=2,cross_val_k = 5):
    classes = get_combinations(names,feature_count)
    wrapper_output = {}
    
    for i in classes:
        cols = np.concatenate((i,target_name),axis=None)
        sub_data = data[cols].values
        result = cross_validate(sub_data,cross_val_k)
        average = sum(result)/len(result)
        
        wrapper_output[str(i)]=average
        
    return wrapper_output

In [38]:
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [39]:
data = load_iris()

data = pd.concat((pd.DataFrame(data['data']),pd.DataFrame(data['target'])),axis=1)
data.columns= ['sepal length (cm)','sepal width (cm)','petal length (cm)','petal width (cm)','target']
data = data[(data['target']==0) | (data['target']==1)]
data_np = data.values
np.random.shuffle(data_np)

In [40]:
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [41]:
columns = list(data.columns)
columns.remove('target')

In [45]:
wrapper_forward(data,columns,['target'],2,10)

{"['sepal length (cm)', 'sepal width (cm)']": 0.9,
 "['petal length (cm)', 'sepal length (cm)']": 0.57,
 "['petal width (cm)', 'sepal length (cm)']": 0.7,
 "['petal length (cm)', 'sepal width (cm)']": 1.0,
 "['petal width (cm)', 'sepal width (cm)']": 1.0,
 "['petal length (cm)', 'petal width (cm)']": 0.97}