In [109]:
import pandas as pd

fileURL = './breastcancer.csv'
breast_cancer_db = pd.read_csv(fileURL)
breast_cancer_mean_db = breast_cancer_db.ix[:,1:12]

In [110]:
breast_cancer_mean_db.loc[(breast_cancer_mean_db['diagnosis'] == 'B'),'diagnosis'] = 0
breast_cancer_mean_db.loc[(breast_cancer_mean_db['diagnosis'] == 'M'),'diagnosis'] = 1
breast_cancer_mean_db['diagnosis'] = pd.to_numeric(breast_cancer_mean_db['diagnosis'])
breast_cancer_mean_db.rename(columns = {'concave points_mean':'concave_points_mean'}, inplace = True)

In [111]:
def get_X_y(feature_cols, target):
    X = breast_cancer_mean_db[feature_cols]
    y = breast_cancer_mean_db[target]
    return X, y

In [113]:
feature_cols = ['texture_mean', 'perimeter_mean', 'smoothness_mean',
               'compactness_mean', 'concave_points_mean']
target = 'diagnosis'
X, y = get_X_y(feature_cols, target)

In [114]:
from sklearn.naive_bayes import GaussianNB
gnb_clf = GaussianNB()

In [115]:
from sklearn.linear_model import LogisticRegression
lrg_clf = LogisticRegression(C=10, fit_intercept=True, tol=0.0001, class_weight='balanced')

In [116]:
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier(n_neighbors=10, weights='uniform')

In [117]:
from sklearn import tree
dt_clf = tree.DecisionTreeClassifier(criterion = "entropy", min_samples_split=4, 
                                     min_samples_leaf=2)

In [118]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(criterion = "gini", n_estimators=5)

In [119]:
from sklearn.cross_validation import cross_val_score

def model_and_validate(clf):
    scores = cross_val_score(clf, X, y, cv=10, scoring='accuracy') 
    return scores.mean()

In [120]:
print model_and_validate(gnb_clf)

0.921070996457


In [121]:
print model_and_validate(lrg_clf)

0.912234249417


In [122]:
print model_and_validate(dt_clf)

0.903555224268


In [123]:
print model_and_validate(knn_clf)

0.891178376977


In [125]:
print model_and_validate(rf_clf)

0.93695553539


In [132]:
from mlxtend.classifier import StackingClassifier

stk_clf = StackingClassifier(classifiers=[rf_clf, lrg_clf], meta_classifier=lrg_clf)

In [133]:
print model_and_validate(stk_clf)

0.931597312246
