In [1]:
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer

In [2]:
dataset = load_breast_cancer(as_frame=True)

In [4]:
dataset['data'].head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [5]:
dataset['target'].head()

0    0
1    0
2    0
3    0
4    0
Name: target, dtype: int64

In [7]:
dataset['target'].value_counts()
# 357 ---> denoted by 1----malignant tumors
# 212 ---> denoted by 0----benign 

1    357
0    212
Name: target, dtype: int64

In [8]:
#Step:1 Dependent variable and independent variable
x = dataset['data']
y = dataset['target']

In [11]:
#Step:2 Split the dataset into training and testing set
from sklearn.model_selection import train_test_split

In [12]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.25, random_state=0)

In [13]:
# Step:3 Normalize the data for numerical stability
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

In [15]:
# Step:4 fit logistic regression into the training set
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(x_train,y_train)

LogisticRegression()

In [16]:
# Step 5: make prediction
y_pred = model.predict(x_test)

In [19]:
# Step 6: measure the accuracy
from sklearn.metrics import accuracy_score, confusion_matrix

In [18]:
accuracy_score(y_test,y_pred)

0.958041958041958

In [26]:
 confusion_matrix(y_test,y_pred).ravel()

array([51,  2,  4, 86])

In [27]:
# INITIALIZING EACH BINARY CLASSIFIER
models = {}

In [28]:
# Logistic model
from sklearn.linear_model import LogisticRegression
models['Logistic Regression'] = LogisticRegression()

In [29]:
# Support Vector Machines
from sklearn.svm import LinearSVC
models['Support Vector Machines'] = LinearSVC()

In [31]:
# Decision Trees
from sklearn.tree import DecisionTreeClassifier
models['Decision Trees'] = DecisionTreeClassifier()

In [32]:
# Random Forest
from sklearn.ensemble import RandomForestClassifier
models['Random Forest'] = RandomForestClassifier()

In [34]:
# Naive Bayes
from sklearn.naive_bayes import GaussianNB
models['Naive Bayes'] = GaussianNB()

In [35]:
# K-Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier
models['K-Nearest Neighbor'] = KNeighborsClassifier()

In [36]:
from sklearn.metrics import accuracy_score, precision_score, recall_score
accuracy, precision, recall ={},{},{}

In [42]:
for key in models.keys():
    # fit the classifier
    models[key].fit(x_train,y_train)
    # make predictions
    y_pred = models[key].predict(x_test)
    
    #calculate metrics
    accuracy[key] = accuracy_score(y_pred,y_test)
    precision[key] = precision_score(y_pred,y_test)
    recall[key] = recall_score(y_pred,y_test)

In [43]:
import pandas as pd
df_models = pd.DataFrame(index=models.keys(), columns=['Accuracy','Precision', 'Recall'])

In [45]:
df_models['Accuracy'] = accuracy.values()
df_models['Precision'] = precision.values()
df_models['Recall'] = recall.values()

In [46]:
df_models

Unnamed: 0,Accuracy,Precision,Recall
Logistic Regression,0.958042,0.955556,0.977273
Support Vector Machines,0.937063,0.933333,0.965517
Decision Trees,0.888112,0.855556,0.9625
Random Forest,0.965035,0.955556,0.988506
Naive Bayes,0.937063,0.955556,0.945055
K-Nearest Neighbor,0.951049,0.988889,0.936842
