In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [2]:
cancer = pd.read_csv('../Data/breast.csv')

In [3]:
cancer
cancer.drop(columns=['Unnamed: 32','id'],inplace=True)

In [4]:
# find out the attributes in the dataset
cancer.info(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   diagnosis                569 non-null    object 
 1   radius_mean              569 non-null    float64
 2   texture_mean             569 non-null    float64
 3   perimeter_mean           569 non-null    float64
 4   area_mean                569 non-null    float64
 5   smoothness_mean          569 non-null    float64
 6   compactness_mean         569 non-null    float64
 7   concavity_mean           569 non-null    float64
 8   concave points_mean      569 non-null    float64
 9   symmetry_mean            569 non-null    float64
 10  fractal_dimension_mean   569 non-null    float64
 11  radius_se                569 non-null    float64
 12  texture_se               569 non-null    float64
 13  perimeter_se             569 non-null    float64
 14  area_se                  5

In [5]:
# find out the total instances and number of features
cancer.head(10)
Y=cancer.pop('diagnosis')

In [6]:
Y

0      M
1      M
2      M
3      M
4      M
      ..
564    M
565    M
566    M
567    M
568    B
Name: diagnosis, Length: 569, dtype: object

In [7]:
# use describe to find out more about the data
cancer.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
radius_mean,569.0,14.127292,3.524049,6.981,11.7,13.37,15.78,28.11
texture_mean,569.0,19.289649,4.301036,9.71,16.17,18.84,21.8,39.28
perimeter_mean,569.0,91.969033,24.298981,43.79,75.17,86.24,104.1,188.5
area_mean,569.0,654.889104,351.914129,143.5,420.3,551.1,782.7,2501.0
smoothness_mean,569.0,0.09636,0.014064,0.05263,0.08637,0.09587,0.1053,0.1634
compactness_mean,569.0,0.104341,0.052813,0.01938,0.06492,0.09263,0.1304,0.3454
concavity_mean,569.0,0.088799,0.07972,0.0,0.02956,0.06154,0.1307,0.4268
concave points_mean,569.0,0.048919,0.038803,0.0,0.02031,0.0335,0.074,0.2012
symmetry_mean,569.0,0.181162,0.027414,0.106,0.1619,0.1792,0.1957,0.304
fractal_dimension_mean,569.0,0.062798,0.00706,0.04996,0.0577,0.06154,0.06612,0.09744


In [8]:
X=cancer
Y

0      M
1      M
2      M
3      M
4      M
      ..
564    M
565    M
566    M
567    M
568    B
Name: diagnosis, Length: 569, dtype: object

In [9]:
X_train, X_test, y_train, y_test = train_test_split (X,Y)

In [10]:
# Fit only to the training data
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler()

In [11]:
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
# create a Multilayerperceptron classifier and call it mlp
mlp = MLPClassifier(hidden_layer_sizes=(30,30,30))

In [13]:
mlp.fit(X_train,y_train)

MLPClassifier(hidden_layer_sizes=(30, 30, 30))

In [14]:
predicition = mlp.predict(X_test)

In [15]:
print(confusion_matrix(y_test,predicition))

[[99  1]
 [ 1 42]]


In [16]:
# Print the coefficient values and interpret it
mlp.coefs_

[array([[ 0.04343368,  0.28525653, -0.0315619 ,  0.15145614,  0.15496657,
         -0.37835869,  0.0621802 , -0.11712598,  0.16250574, -0.11415927,
         -0.31768081, -0.24382617, -0.19694991,  0.23053701, -0.1091789 ,
          0.37413761,  0.08268916,  0.05905808,  0.14889098,  0.10208922,
          0.15181699, -0.27505078, -0.31375686, -0.07196976, -0.21888315,
         -0.06667941,  0.25243753, -0.0081643 , -0.33352068, -0.27722728],
        [-0.18912979,  0.17102364, -0.40536848, -0.12475143, -0.42894405,
          0.15121697,  0.05807576,  0.23110932,  0.08635095,  0.13124301,
          0.29437846,  0.14281467,  0.13772467,  0.37849514, -0.04949129,
          0.23066455, -0.2493109 ,  0.16537875,  0.2397119 ,  0.27431626,
          0.05960105, -0.09391404,  0.04380318,  0.02614063,  0.02109852,
         -0.04194944, -0.26322135,  0.13383447,  0.19638838,  0.16667382],
        [-0.03520378,  0.2002173 ,  0.0501862 ,  0.0647167 , -0.20547929,
         -0.17701308, -0.10951607, -

In [17]:
# Print the intercepts values and interpret it
mlp.intercepts_[0]

array([ 0.33736601, -0.3011563 ,  0.2781632 ,  0.37782874, -0.03617522,
        0.2172292 ,  0.32831734,  0.29075699,  0.03319368,  0.04137845,
        0.32330727,  0.0671827 ,  0.21372975, -0.13822868,  0.18973448,
        0.02561194, -0.10143778,  0.32621174,  0.19463259,  0.01457573,
       -0.25531055,  0.23138068, -0.09365568, -0.15676701,  0.16585566,
        0.30925596, -0.04459073,  0.39850504, -0.09292637, -0.12373922])

In [18]:
dtc = DecisionTreeClassifier(max_depth=2)
dtc.fit(X_train, y_train)
cv_score = cross_val_score(dtc, X_train, y_train, cv=10)
print ("Decision Tree: ",cv_score)
pred = dtc.predict(X_test)

Decision Tree:  [0.95348837 0.90697674 0.97674419 0.88372093 0.90697674 0.93023256
 0.97619048 0.92857143 0.9047619  0.95238095]


In [19]:
rdc = RandomForestClassifier(max_depth=2)
rdc.fit(X_train, y_train)
cv_score = cross_val_score(rdc, X_train, y_train, cv=10)
print ("RandomForest: ",cv_score)
pred = rdc.predict(X_test)

RandomForest:  [0.90697674 0.88372093 1.         0.90697674 0.88372093 0.93023256
 0.97619048 0.95238095 1.         0.95238095]


In [20]:
y_pred = mlp.predict(X_test)
y_pred

array(['M', 'B', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'B',
       'B', 'M', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'B',
       'B', 'B', 'M', 'M', 'B', 'M', 'M', 'M', 'B', 'B', 'M', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'B', 'B', 'M',
       'M', 'B', 'B', 'M', 'M', 'B', 'B', 'B', 'B', 'M', 'B', 'M', 'B',
       'B', 'B', 'B', 'M', 'M', 'M', 'B', 'M', 'M', 'B', 'B', 'M', 'B',
       'B', 'M', 'B', 'M', 'B', 'B', 'B', 'B', 'B', 'M', 'M', 'B', 'B',
       'B', 'M', 'B', 'B', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'B', 'B',
       'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B', 'B',
       'B', 'B', 'B', 'M', 'M', 'B', 'B', 'M', 'B', 'M', 'B', 'M', 'M'],
      dtype='<U1')

In [21]:
print(confusion_matrix(y_test,y_pred))
print(accuracy_score(y_test,y_pred))

[[99  1]
 [ 1 42]]
0.986013986013986
