In [1]:
import pandas as pd
import numpy as np 
from sklearn.model_selection import cross_validate
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix

### Loading data
Data is shuffled and the nan rows are dropped. Since I use CV, 680 samples are selected from total of 683 samples.

In these models, I did not optimized the parameters of classifier. I simply used all the defaults.

In [12]:
Data=pd.read_csv('BreastCancer.csv').sample(frac=1).dropna()
X_df=Data.iloc[0:-3,1:-1]
Y_df=Data['Class'][0:-3]
x=X_df.values
y=Y_df.values
print(x.shape, y.shape)
k=10 #Number of folds

(680, 9) (680,)


### Decision tree classifier

In [13]:

N=x.shape[0]
ypredM1=[]
test_scoreM1=[]
for l in range(k):
    ind = np.arange(l * int(N / k), (l + 1) * int(N / k))
    x_train=np.delete(x,ind, axis=0)
    y_train=np.delete(y,ind)
    x_test=x[ind]
    y_test=y[ind]
    
    model=DecisionTreeClassifier()
    #cv_results=cross_validate(model,x_train, y_train, cv=5)
    #cv_results['test_score'].mean()
    model.fit(x_train, y_train)
    y_pred=model.predict(x_test)
    ypredM1.append(y_pred)
    test_scoreM1.append(model.score(x_test, y_test))
yM1=np.ndarray.flatten(np.array(ypredM1)).reshape(-1,1)

print(np.array(test_scoreM1).mean())
confusion_matrix(yM1,y)

0.9441176470588235


array([[428,  25],
       [ 13, 214]], dtype=int64)

### Support vector machine classifier

In [14]:

N=x.shape[0]
ypredM2=[]
test_scoreM2=[]
for l in range(k):
    ind = np.arange(l * int(N / k), (l + 1) * int(N / k))
    x_train=np.delete(x,ind, axis=0)
    y_train=np.delete(y,ind)
    x_test=x[ind]
    y_test=y[ind]
    
    model=SVC()
    #cv_results=cross_validate(model,x_train, y_train, cv=5)
    #cv_results['test_score'].mean()
    model.fit(x_train, y_train)
    y_pred=model.predict(x_test)
    ypredM2.append(y_pred)
    test_scoreM2.append(model.score(x_test, y_test))
yM2=np.ndarray.flatten(np.array(ypredM2)).reshape(-1,1)

print(np.array(test_scoreM2).mean())
confusion_matrix(yM2,y)

0.9573529411764706




array([[414,   2],
       [ 27, 237]], dtype=int64)

### Multi layer perceptron classifier

In [15]:

N=x.shape[0]
ypredM3=[]

test_scoreM3=[]
for l in range(k):
    ind = np.arange(l * int(N / k), (l + 1) * int(N / k))
    x_train=np.delete(x,ind, axis=0)
    y_train=np.delete(y,ind)
    x_test=x[ind]
    y_test=y[ind]
    
    model=MLPClassifier()
    #cv_results=cross_validate(model,x_train, y_train, cv=5)
    #cv_results['test_score'].mean()
    model.fit(x_train, y_train)
    y_pred=model.predict(x_test)
    ypredM3.append(y_pred)
    test_scoreM3.append(model.score(x_test, y_test))
yM3=np.ndarray.flatten(np.array(ypredM3)).reshape(-1,1)

print(np.array(test_scoreM3).mean())
confusion_matrix(yM3,y)



0.9691176470588235




array([[430,  10],
       [ 11, 229]], dtype=int64)

### Stacking all the three predictions and using a decision tree classifier on them

In [16]:
X_level_1=np.concatenate((np.concatenate((yM1, yM2), axis=1), yM3), axis=1)
X_level_1.shape

(680, 3)

In [17]:
N=X_level_1.shape[0]
ypred_level_1=[]

test_score_level_1=[]
for l in range(k):
    ind = np.arange(l * int(N / k), (l + 1) * int(N / k))
    
    x_train=np.delete(X_level_1,ind, axis=0)
    y_train=np.delete(y,ind, axis=0)
    
    x_test=X_level_1[ind]
    y_test=y[ind]
    
    model=DecisionTreeClassifier()
    #cv_results=cross_validate(model,x_train, y_train, cv=5)
    #cv_results['test_score'].mean()
    model.fit(x_train, y_train)
    y_pred=model.predict(x_test)
    ypred_level_1.append(y_pred)
    test_score_level_1.append(model.score(x_test, y_test))
print(np.array(test_score_level_1).mean())
confusion_matrix(yM3,y)

0.9735294117647058


array([[430,  10],
       [ 11, 229]], dtype=int64)

### Results: It doesn't seem that the ensemble stacking make the prediction better in this data set