In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('data.csv')

In [3]:
df.drop(df.columns[[-1, 0]], axis=1, inplace=True)

In [4]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

import time

In [5]:
diag_map = {'M':1, 'B':0}
df['diagnosis'] = df['diagnosis'].map(diag_map)

## **Using all mean values features**

Here we are constructing a ML Model that will predict malignant or benign tumors cells.

After training our ML Model we need to test it's accuracy.

In [6]:
X = df[['radius_mean', 'perimeter_mean', 'area_mean', 'concavity_mean', 'concave points_mean']]
y = df[['diagnosis']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

accuracy_all = []
cvs_all = []

### **Nearest Neighbors**

In [7]:
from sklearn.neighbors import KNeighborsClassifier

In [8]:
import warnings
warnings.filterwarnings("ignore")

In [9]:
knn = KNeighborsClassifier()

In [10]:
knn.fit(X_train, y_train)

KNeighborsClassifier()

In [11]:
knn_y_pred = knn.predict(X_test)

In [12]:
scores = cross_val_score(knn, X, y, cv=5)



accuracy_all.append(accuracy_score(knn_y_pred, y_test))
cvs_all.append(np.mean(scores))

print("Accuracy: {0:.2%}".format(accuracy_score(knn_y_pred, y_test)))
print("Cross validation score: {0:.2%} (+/- {1:.2%})".format(np.mean(scores), np.std(scores)*2))

Accuracy: 92.11%
Cross validation score: 88.23% (+/- 7.06%)


###  **Naive Bayes**

In [13]:
from sklearn.naive_bayes import GaussianNB

In [14]:
gnb = GaussianNB()

In [15]:
gnb.fit(X_train, y_train)

GaussianNB()

In [16]:
gnb_y_pred = gnb.predict(X_test)

In [17]:
scores = cross_val_score(gnb, X, y, cv=5)



accuracy_all.append(accuracy_score(gnb_y_pred, y_test))
cvs_all.append(np.mean(scores))

print("Accuracy: {0:.2%}".format(accuracy_score(gnb_y_pred, y_test)))
print("Cross validation score: {0:.2%} (+/- {1:.2%})".format(np.mean(scores), np.std(scores)*2))

Accuracy: 94.74%
Cross validation score: 90.87% (+/- 5.91%)


### **Logistic Regression**

In [18]:
from sklearn.linear_model import LogisticRegression

In [19]:
logReg = LogisticRegression(random_state=0)

In [20]:
logReg.fit(X_train,y_train)

LogisticRegression(random_state=0)

In [21]:
y_pred=logReg.predict(X_test)

In [22]:
accuracy_all.append(accuracy_score(y_pred, y_test))
cvs_all.append(np.mean(scores))

print("Accuracy: {0:.2%}".format(accuracy_score(y_pred, y_test)))
print("Cross validation score: {0:.2%} (+/- {1:.2%})".format(np.mean(scores), np.std(scores)*2))

Accuracy: 92.98%
Cross validation score: 90.87% (+/- 5.91%)
