In [1]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [38]:
from sklearn.model_selection import train_test_split
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import cross_val_score , StratifiedKFold
from sklearn import metrics
from sklearn.model_selection import learning_curve
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import KFold

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state =42)

In [4]:
classifiers = []

logreg_tuned_parameters = [{'C': np.logspace(-1, 2, 4),'penalty':['l1','l2']}]
classifiers.append(["Logistic Regression", LogisticRegression(), logreg_tuned_parameters])

svm_tuned_parameters = [{'kernel': ['linear','rbf'],
                             'C': np.logspace(-1, 2, 4),
                             'gamma': np.logspace(-4, 0, 5)}]
classifiers.append(["SVM", SVC(), svm_tuned_parameters])
 
rf_tuned_parameters = [{"criterion": ["gini"],}]                
classifiers.append(["RandomForest", RandomForestClassifier(n_jobs=-1), rf_tuned_parameters])

knn_tuned_parameters = [{"n_neighbors": [1, 3, 5, 10, 20]}]
classifiers.append(["kNN", KNeighborsClassifier(),knn_tuned_parameters])

In [121]:
def gsCV_accuracy(name,classifier, params, train, target):
 
    print (name+":")
    gs= GridSearchCV(classifier, params, n_jobs=-1, cv=5,scoring="accuracy")
    gs.fit(train, target)
    #print (gs.best_params_, gs.best_score_)
    
    predict = gs.best_estimator_.predict(train)
    print(metrics.classification_report(target,predict))
    print(metrics.confusion_matrix(target, predict))
    print(cross_val_score(gs.best_estimator_, train,target,cv= 5).mean())


In [122]:
for i in range(len(classifiers)):
    gsCV_accuracy(classifiers[i][0],classifiers[i][1], classifiers[i][2], X_train, y_train)
    

Logistic Regression:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        31
          1       0.97      0.95      0.96        37
          2       0.95      0.97      0.96        37

avg / total       0.97      0.97      0.97       105

[[31  0  0]
 [ 0 35  2]
 [ 0  1 36]]
0.953122529644
SVM:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        31
          1       1.00      0.95      0.97        37
          2       0.95      1.00      0.97        37

avg / total       0.98      0.98      0.98       105

[[31  0  0]
 [ 0 35  2]
 [ 0  0 37]]
0.970909090909
RandomForest:
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        31
          1       0.95      1.00      0.97        37
          2       1.00      0.95      0.97        37

avg / total       0.98      0.98      0.98       105

[[31  0  0]
 [ 0 37  0]
 [ 0  2 35]]
0.934031620553


In [126]:
class Stacking(object):

    def __init__(self, seed, n_fold, base_learners, meta_learner):
        self.seed = seed
        self.n_fold = n_fold
        self.base_learners = base_learners
        self.meta_learner = meta_learner
        self.T = len(base_learners) # num of base learners

    def generateBaseLearner(self, X_tr, y_tr, X_te, y_te):
    
        n1 = X_tr.shape[0]
        n2 = X_te.shape[0]

        kf = KFold(n1, n_folds= self.n_fold, random_state= self.seed)

        #constructing data for meta learner
        meta_train = np.zeros((n1, self.T))
        meta_test = np.zeros((n2, self.T))

        for i, clf in enumerate(self.base_learners):
            meta_test_i = np.zeros((n2, self.n_fold))
            for j, (train_index, test_index) in enumerate(kf):
                X_train = X_tr[train_index]
                y_train = y_tr[train_index]
                X_holdout = X_tr[test_index]
                y_holdout = y_tr[test_index]
           
                clf[1].fit(X_train, y_train)
                y_pred = clf[1].predict(X_holdout)[:]
                
                print 'Base Learner:%s accuracy = %s' % (clf[0], metrics.accuracy_score(y_holdout, y_pred))
                # filling predicted X_holdout into meta training set
                meta_train[test_index, i] = y_pred
                meta_test_i[:, j] = clf[1].predict(X_te)[:]
            
            meta_test[:, i] = meta_test_i.mean(1)
        
        self.meta_learner.fit(meta_train, y_tr)
        y_result_pred = self.meta_learner.predict(meta_test)
        print metrics.classification_report(y_te, y_result_pred)
        print 'Final accuracy = %s' % (metrics.accuracy_score(y_te, y_result_pred))
        return y_result_pred
    
  

In [124]:
#baseLearner Default

lg = LogisticRegression(random_state= 42)
svm = SVC(random_state= 42)
rf = RandomForestClassifier( random_state= 42, n_jobs=-1)
knn = KNeighborsClassifier()
base_learner = [['SVM', svm], ['Random Forest', rf], ['KNN',knn]]

lg2 = LogisticRegression(penalty = 'l1', C = 10 ,random_state= 42)
svm2 = SVC(kernel= 'rbf', C= 100.0, gamma= 0.01,random_state= 42)
rf2 = RandomForestClassifier( criterion = 'gini',random_state= 42, n_jobs=-1)
knn2 = KNeighborsClassifier(n_neighbors = 1)
base_learner2 = [['SVM', svm2], ['Random Forest', rf2], ['KNN',knn2]]


base_learner3 = [['SVM', svm], ['Logistic regression', lg], ['KNN',knn]]

In [127]:
stackingD = Stacking(42, 3, base_learner, lg)

stackingD.generateBaseLearner(X_train, y_train, X_test, y_test) 

Base Learner:SVM accuracy = 0.971428571429
Base Learner:SVM accuracy = 0.885714285714
Base Learner:SVM accuracy = 0.971428571429
Base Learner:Random Forest accuracy = 0.971428571429
Base Learner:Random Forest accuracy = 0.885714285714
Base Learner:Random Forest accuracy = 0.971428571429
Base Learner:KNN accuracy = 0.914285714286
Base Learner:KNN accuracy = 0.914285714286
Base Learner:KNN accuracy = 0.971428571429
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        19
          1       1.00      1.00      1.00        13
          2       1.00      1.00      1.00        13

avg / total       1.00      1.00      1.00        45

Final accuracy = 1.0


array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0,
       2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0])

In [128]:
stacking2 = Stacking(42, 3, base_learner2, lg2)

stacking2.generateBaseLearner(X_train, y_train, X_test, y_test) 

Base Learner:SVM accuracy = 1.0
Base Learner:SVM accuracy = 0.885714285714
Base Learner:SVM accuracy = 1.0
Base Learner:Random Forest accuracy = 0.971428571429
Base Learner:Random Forest accuracy = 0.885714285714
Base Learner:Random Forest accuracy = 0.971428571429
Base Learner:KNN accuracy = 0.942857142857
Base Learner:KNN accuracy = 0.914285714286
Base Learner:KNN accuracy = 0.971428571429
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        19
          1       1.00      1.00      1.00        13
          2       1.00      1.00      1.00        13

avg / total       1.00      1.00      1.00        45

Final accuracy = 1.0


array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0,
       2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 1, 1, 0, 0])

In [129]:
    ''' X_tr = np.array(X_tr)
        y_tr = np.array(y_tr)
        X_te = np.array(X_te)
        y_te = np.array(y_te)'''
stacking3 = Stacking(42, 3, base_learner3, rf)

stacking3.generateBaseLearner(X_train, y_train, X_test, y_test) 

Base Learner:SVM accuracy = 0.971428571429
Base Learner:SVM accuracy = 0.885714285714
Base Learner:SVM accuracy = 0.971428571429
Base Learner:Logistic regression accuracy = 0.942857142857
Base Learner:Logistic regression accuracy = 0.914285714286
Base Learner:Logistic regression accuracy = 0.971428571429
Base Learner:KNN accuracy = 0.914285714286
Base Learner:KNN accuracy = 0.914285714286
Base Learner:KNN accuracy = 0.971428571429
             precision    recall  f1-score   support

          0       1.00      1.00      1.00        19
          1       1.00      0.92      0.96        13
          2       0.93      1.00      0.96        13

avg / total       0.98      0.98      0.98        45

Final accuracy = 0.977777777778


array([1, 0, 2, 1, 1, 0, 1, 2, 1, 1, 2, 0, 0, 0, 0, 1, 2, 1, 1, 2, 0, 2, 0,
       2, 2, 2, 2, 2, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 2, 2, 1, 0, 0])