In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [2]:
cancer = pd.read_csv('../Data/breast.csv')

In [3]:
cancer
cancer.drop(columns=['Unnamed: 32','id'],inplace=True)

In [4]:
# find out the attributes in the dataset
cancer.info(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   diagnosis                569 non-null    object 
 1   radius_mean              569 non-null    float64
 2   texture_mean             569 non-null    float64
 3   perimeter_mean           569 non-null    float64
 4   area_mean                569 non-null    float64
 5   smoothness_mean          569 non-null    float64
 6   compactness_mean         569 non-null    float64
 7   concavity_mean           569 non-null    float64
 8   concave points_mean      569 non-null    float64
 9   symmetry_mean            569 non-null    float64
 10  fractal_dimension_mean   569 non-null    float64
 11  radius_se                569 non-null    float64
 12  texture_se               569 non-null    float64
 13  perimeter_se             569 non-null    float64
 14  area_se                  5

In [5]:
# find out the total instances and number of features
cancer.head(10)
Y=cancer.pop('diagnosis')

In [6]:
Y

0      M
1      M
2      M
3      M
4      M
      ..
564    M
565    M
566    M
567    M
568    B
Name: diagnosis, Length: 569, dtype: object

In [7]:
# use describe to find out more about the data
cancer.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
radius_mean,569.0,14.127292,3.524049,6.981,11.7,13.37,15.78,28.11
texture_mean,569.0,19.289649,4.301036,9.71,16.17,18.84,21.8,39.28
perimeter_mean,569.0,91.969033,24.298981,43.79,75.17,86.24,104.1,188.5
area_mean,569.0,654.889104,351.914129,143.5,420.3,551.1,782.7,2501.0
smoothness_mean,569.0,0.09636,0.014064,0.05263,0.08637,0.09587,0.1053,0.1634
compactness_mean,569.0,0.104341,0.052813,0.01938,0.06492,0.09263,0.1304,0.3454
concavity_mean,569.0,0.088799,0.07972,0.0,0.02956,0.06154,0.1307,0.4268
concave points_mean,569.0,0.048919,0.038803,0.0,0.02031,0.0335,0.074,0.2012
symmetry_mean,569.0,0.181162,0.027414,0.106,0.1619,0.1792,0.1957,0.304
fractal_dimension_mean,569.0,0.062798,0.00706,0.04996,0.0577,0.06154,0.06612,0.09744


In [8]:
X=cancer
Y

0      M
1      M
2      M
3      M
4      M
      ..
564    M
565    M
566    M
567    M
568    B
Name: diagnosis, Length: 569, dtype: object

In [9]:
X_train, X_test, y_train, y_test = train_test_split (X,Y)

In [10]:
# Fit only to the training data
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler()

In [11]:
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
# create a Multilayerperceptron classifier and call it mlp
mlp = MLPClassifier(hidden_layer_sizes=(30,30,30))

In [13]:
mlp.fit(X_train,y_train)



MLPClassifier(hidden_layer_sizes=(30, 30, 30))

In [14]:
predicition = mlp.predict(X_test)

In [15]:
print(confusion_matrix(y_test,predicition))

[[95  2]
 [ 0 46]]


In [16]:
# Print the coefficient values and interpret it
mlp.coefs_

[array([[-5.05994151e-02, -3.79612144e-02,  1.23186979e-02,
         -5.53405626e-04,  2.43682541e-02, -2.75264561e-01,
          1.90989056e-01, -9.16308349e-02,  5.65109712e-03,
          2.27348184e-01, -2.04730362e-01, -2.53848501e-01,
          1.09457243e-01, -1.75248855e-01, -2.19447834e-01,
         -7.42078009e-02,  6.36504320e-02, -1.75981415e-01,
         -1.54617174e-01, -1.47616827e-01,  7.19476573e-02,
         -4.71057205e-02,  1.74833308e-02, -8.15231384e-02,
         -6.61871338e-02,  2.70439268e-01, -6.74314941e-02,
          8.89739972e-02, -1.73674500e-01,  1.93051279e-01],
        [ 1.64414130e-01, -3.57305822e-01,  3.27350685e-01,
         -1.33845639e-02,  2.92787509e-01, -1.22685505e-01,
          2.25581846e-01,  6.46245489e-02, -1.07950119e-01,
          2.84258577e-01,  1.12671668e-01, -1.61207300e-01,
          5.99985923e-02, -1.56494721e-01, -1.12030169e-02,
         -9.06819750e-02,  1.84526549e-01, -3.60810189e-01,
         -1.14935027e-01,  1.30132167e-

In [17]:
# Print the intercepts values and interpret it
mlp.intercepts_[0]

array([ 0.29835156,  0.12846111, -0.26926648,  0.10011821,  0.3540561 ,
       -0.19381993, -0.05512857,  0.04577367,  0.21962024,  0.22444457,
       -0.01151595,  0.17491745,  0.20826073, -0.23616992,  0.34497275,
       -0.0049768 , -0.12169005,  0.2511009 , -0.2778379 ,  0.3189768 ,
        0.40471842, -0.10177461,  0.25288282,  0.25860722, -0.36451844,
        0.33267075, -0.18959092, -0.10911006, -0.20977304,  0.20145232])

In [18]:
dtc = DecisionTreeClassifier(max_depth=2)
dtc.fit(X_train, y_train)
cv_score = cross_val_score(dtc, X_train, y_train, cv=10)
print ("Decision Tree: ",cv_score)
pred = dtc.predict(X_test)

Decision Tree:  [0.90697674 0.90697674 0.93023256 0.97674419 0.90697674 0.95348837
 0.92857143 0.9047619  0.95238095 0.95238095]


In [19]:
rdc = RandomForestClassifier(max_depth=2)
rdc.fit(X_train, y_train)
cv_score = cross_val_score(rdc, X_train, y_train, cv=10)
print ("RandomForest: ",cv_score)
pred = rdc.predict(X_test)

RandomForest:  [0.90697674 0.93023256 0.93023256 0.97674419 0.95348837 0.97674419
 0.95238095 0.95238095 0.97619048 0.95238095]


In [None]:
y_pred = mlp.predict(X_test)
y_pred

In [None]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))