In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier,VotingClassifier,AdaBoostClassifier,StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [2]:
data=load_breast_cancer()
X=data.data
Y=data.target

In [3]:
X

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [4]:
Y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [5]:
sc=StandardScaler()
X=sc.fit_transform(X)
X

array([[ 1.09706398, -2.07333501,  1.26993369, ...,  2.29607613,
         2.75062224,  1.93701461],
       [ 1.82982061, -0.35363241,  1.68595471, ...,  1.0870843 ,
        -0.24388967,  0.28118999],
       [ 1.57988811,  0.45618695,  1.56650313, ...,  1.95500035,
         1.152255  ,  0.20139121],
       ...,
       [ 0.70228425,  2.0455738 ,  0.67267578, ...,  0.41406869,
        -1.10454895, -0.31840916],
       [ 1.83834103,  2.33645719,  1.98252415, ...,  2.28998549,
         1.91908301,  2.21963528],
       [-1.80840125,  1.22179204, -1.81438851, ..., -1.74506282,
        -0.04813821, -0.75120669]])

In [6]:
x_train,x_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=23)

In [7]:
param_grids={'n_neighbors':[1,3,5,7,9,11,13,15,17,21,25]}
grid=GridSearchCV(KNeighborsClassifier(),param_grids,cv=5,scoring='accuracy')
grid.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=KNeighborsClassifier(),
             param_grid={'n_neighbors': [1, 3, 5, 7, 9, 11, 13, 15, 17, 21,
                                         25]},
             scoring='accuracy')

In [8]:
grid.best_params_

{'n_neighbors': 11}

In [9]:
grid.best_score_

0.9648351648351647

In [10]:
y_pred=grid.predict(x_test)

In [11]:
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

98.24561403508771
[[37  2]
 [ 0 75]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.97        39
           1       0.97      1.00      0.99        75

    accuracy                           0.98       114
   macro avg       0.99      0.97      0.98       114
weighted avg       0.98      0.98      0.98       114



In [12]:
param_grids={'C':[0.1,1,10,50,100,200,500,1000],'gamma':[0.001,0.01,0.05,0.1,0.5,1,10],'kernel':['rbf','poly','linear']}
grid=GridSearchCV(SVC(),param_grids,cv=5,scoring='accuracy')
grid.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid={'C': [0.1, 1, 10, 50, 100, 200, 500, 1000],
                         'gamma': [0.001, 0.01, 0.05, 0.1, 0.5, 1, 10],
                         'kernel': ['rbf', 'poly', 'linear']},
             scoring='accuracy')

In [13]:
grid.best_params_

{'C': 1, 'gamma': 0.05, 'kernel': 'rbf'}

In [14]:
grid.best_score_

0.9736263736263737

In [15]:
y_pred=grid.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

99.12280701754386
[[38  1]
 [ 0 75]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        39
           1       0.99      1.00      0.99        75

    accuracy                           0.99       114
   macro avg       0.99      0.99      0.99       114
weighted avg       0.99      0.99      0.99       114



In [16]:
param_grids={'n_estimators':[1,10,50,100,200,400],'criterion':['gini','entropy'],'max_depth':[2,4,5,6,7,8,10]}
grid=GridSearchCV(RandomForestClassifier(),param_grids,cv=5,scoring='accuracy')
grid.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'max_depth': [2, 4, 5, 6, 7, 8, 10],
                         'n_estimators': [1, 10, 50, 100, 200, 400]},
             scoring='accuracy')

In [17]:
grid.best_params_

{'criterion': 'entropy', 'max_depth': 8, 'n_estimators': 200}

In [18]:
grid.best_score_

0.9670329670329669

In [19]:
y_pred=grid.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

96.49122807017544
[[36  3]
 [ 1 74]]
              precision    recall  f1-score   support

           0       0.97      0.92      0.95        39
           1       0.96      0.99      0.97        75

    accuracy                           0.96       114
   macro avg       0.97      0.95      0.96       114
weighted avg       0.97      0.96      0.96       114



In [20]:
param_grids={'solver':['sag','lbfgs','saga','liblinear']}
grid=GridSearchCV(LogisticRegression(),param_grids,cv=5,scoring='accuracy')
grid.fit(x_train,y_train)



GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'solver': ['sag', 'lbfgs', 'saga', 'liblinear']},
             scoring='accuracy')

In [21]:
grid.best_params_

{'solver': 'saga'}

In [22]:
grid.best_score_

0.9758241758241759

In [23]:
y_pred=grid.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

98.24561403508771
[[38  1]
 [ 1 74]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        39
           1       0.99      0.99      0.99        75

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [24]:
estimator = [] 
estimator.append(('LR',  
                  LogisticRegression(solver ='saga',  
                                     max_iter = 200))) 
estimator.append(('SVC', SVC(C=0.1,gamma =0.001, probability = True))) 
estimator.append(('RF', RandomForestClassifier(n_estimators=100,max_depth=7,criterion='gini'))) 
estimator.append(('KNN',KNeighborsClassifier(n_neighbors=13)))

In [25]:
estimator

[('LR', LogisticRegression(max_iter=200, solver='saga')),
 ('SVC', SVC(C=0.1, gamma=0.001, probability=True)),
 ('RF', RandomForestClassifier(max_depth=7)),
 ('KNN', KNeighborsClassifier(n_neighbors=13))]

In [26]:
vot_hard = VotingClassifier(estimators = estimator, voting ='hard') 
vot_hard.fit(x_train, y_train) 
y_pred = vot_hard.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))



98.24561403508771
[[37  2]
 [ 0 75]]
              precision    recall  f1-score   support

           0       1.00      0.95      0.97        39
           1       0.97      1.00      0.99        75

    accuracy                           0.98       114
   macro avg       0.99      0.97      0.98       114
weighted avg       0.98      0.98      0.98       114



In [27]:
vot_hard = VotingClassifier(estimators = estimator, voting ='soft') 
vot_hard.fit(x_train, y_train) 
y_pred = vot_hard.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))



98.24561403508771
[[38  1]
 [ 1 74]]
              precision    recall  f1-score   support

           0       0.97      0.97      0.97        39
           1       0.99      0.99      0.99        75

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [28]:
ada=AdaBoostClassifier(base_estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                         max_depth=7, max_features='auto', max_leaf_nodes=None,
                         min_impurity_decrease=0.0, min_impurity_split=None,
                         min_samples_leaf=1, min_samples_split=2,
                         min_weight_fraction_leaf=0.0, n_estimators=100,
                         n_jobs=None, oob_score=False, random_state=None,
                         verbose=0, warm_start=False) ,n_estimators=100,learning_rate=0.001,algorithm='SAMME')

In [29]:
ada

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=RandomForestClassifier(max_depth=7),
                   learning_rate=0.001, n_estimators=100)

In [30]:
ada.fit(x_train,y_train)

AdaBoostClassifier(algorithm='SAMME',
                   base_estimator=RandomForestClassifier(max_depth=7),
                   learning_rate=0.001, n_estimators=100)

In [31]:
y_pred=ada.predict(x_test)

In [32]:
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

96.49122807017544
[[36  3]
 [ 1 74]]
              precision    recall  f1-score   support

           0       0.97      0.92      0.95        39
           1       0.96      0.99      0.97        75

    accuracy                           0.96       114
   macro avg       0.97      0.95      0.96       114
weighted avg       0.97      0.96      0.96       114



In [33]:
gbm=GradientBoostingClassifier(n_estimators=100,subsample=0.5,learning_rate=0.2)

In [34]:
gbm.fit(x_train,y_train)

GradientBoostingClassifier(learning_rate=0.2, subsample=0.5)

In [35]:
y_pred=gbm.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

96.49122807017544
[[35  4]
 [ 0 75]]
              precision    recall  f1-score   support

           0       1.00      0.90      0.95        39
           1       0.95      1.00      0.97        75

    accuracy                           0.96       114
   macro avg       0.97      0.95      0.96       114
weighted avg       0.97      0.96      0.96       114



In [36]:
stc=StackingClassifier(estimators=estimator,final_estimator=SVC(C=10,gamma=0.01),cv=5)
stc.fit(x_train,y_train)



StackingClassifier(cv=5,
                   estimators=[('LR',
                                LogisticRegression(max_iter=200,
                                                   solver='saga')),
                               ('SVC',
                                SVC(C=0.1, gamma=0.001, probability=True)),
                               ('RF', RandomForestClassifier(max_depth=7)),
                               ('KNN', KNeighborsClassifier(n_neighbors=13))],
                   final_estimator=SVC(C=10, gamma=0.01))

In [37]:
y_pred=stc.predict(x_test)
acc=accuracy_score(y_test,y_pred)*100
con_mat=confusion_matrix(y_test,y_pred)
print(acc)
print(con_mat)
print(classification_report(y_test,y_pred))

99.12280701754386
[[38  1]
 [ 0 75]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.99        39
           1       0.99      1.00      0.99        75

    accuracy                           0.99       114
   macro avg       0.99      0.99      0.99       114
weighted avg       0.99      0.99      0.99       114



In [38]:
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


In [39]:
x_train.shape

(455, 30)

In [45]:
model = Sequential()
model.add(Dense(16, input_dim=30, activation='relu'))
model.add(Dense(12, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [46]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [47]:
model.fit(x_train,y_train,epochs=15,batch_size=32)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.callbacks.History at 0x211a8640358>

In [48]:
loss, accuracy = model.evaluate(x_test ,y_test)
print('Accuracy: %.2f' % (accuracy*100))
print('Loss: ',loss)

Accuracy: 98.25
Loss:  0.07527698030727997


In [49]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 16)                496       
_________________________________________________________________
dense_7 (Dense)              (None, 12)                204       
_________________________________________________________________
dense_8 (Dense)              (None, 8)                 104       
_________________________________________________________________
dense_9 (Dense)              (None, 4)                 36        
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 5         
Total params: 845
Trainable params: 845
Non-trainable params: 0
_________________________________________________________________
