In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB

In [6]:
data = load_breast_cancer()
df = pd.DataFrame(data=np.c_[data.data,data.target],columns=[list(data.feature_names)+["target"]])
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,radius error,texture error,perimeter error,area error,smoothness error,compactness error,concavity error,concave points error,symmetry error,fractal dimension error,worst radius,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,1.095,0.9053,8.589,153.4,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0.0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.0186,0.0134,0.01389,0.003532,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0.0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.00615,0.04006,0.03832,0.02058,0.0225,0.004571,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0.0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,0.4956,1.156,3.445,27.23,0.00911,0.07458,0.05661,0.01867,0.05963,0.009208,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0.0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.01149,0.02461,0.05688,0.01885,0.01756,0.005115,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0.0


In [7]:
df.shape

(569, 31)

In [8]:
features = df.iloc[:,0:-1]
level = df.iloc[:,-1]

In [9]:
xtrain,xtest,ytrain,ytest = train_test_split(features,level,test_size=0.2,random_state=8)

In [10]:
print('Shape of X_train = ', xtrain.shape)
print('Shape of y_train = ', ytrain.shape)
print('Shape of X_test = ', xtest.shape)
print('Shape of y_test = ', ytest.shape)

Shape of X_train =  (455, 30)
Shape of y_train =  (455,)
Shape of X_test =  (114, 30)
Shape of y_test =  (114,)


# apply GaussianNB

In [11]:
gaussianNb_model = GaussianNB()
gaussianNb_model.fit(xtrain,ytrain)
gaussianNb_model.score(X=xtest,y=ytest)

0.9473684210526315

# apply MultinomialNB

In [12]:
multinomialNb_model = MultinomialNB()
multinomialNb_model.fit(xtrain,ytrain)
multinomialNb_model.score(X=xtest,y=ytest)

0.8859649122807017

# apply BernoulliNB

In [13]:
bernoulliNb_model = BernoulliNB()
bernoulliNb_model.fit(xtrain,ytrain)
bernoulliNb_model.score(X=xtest,y=ytest)

0.5964912280701754

In [14]:
y_pred = gaussianNb_model.predict(X=xtest)

In [15]:
y_pred

array([1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1.,
       0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 0., 0.,
       0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1., 1., 0., 0., 0., 1.,
       0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 1., 0., 1.,
       1., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 1., 0.,
       1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1.])

# Apply k-fold cross_validtion

In [16]:
cvs = cross_val_score(estimator=gaussianNb_model,X=features,y=level,cv=10)
cvs

array([0.94736842, 0.87719298, 0.89473684, 0.92982456, 0.94736842,
       0.96491228, 0.92982456, 0.96491228, 0.94736842, 0.96428571])

In [17]:
# max and min accuracy
cvs.max(),cvs.min()

(0.9649122807017544, 0.8771929824561403)

In [18]:
# average accuracy
cvs.mean()

0.9367794486215537

In [19]:
pred_one = xtest.iloc[77,:].values
pred_one

array([1.234e+01, 2.222e+01, 7.985e+01, 4.645e+02, 1.012e-01, 1.015e-01,
       5.370e-02, 2.822e-02, 1.551e-01, 6.761e-02, 2.949e-01, 1.656e+00,
       1.955e+00, 2.155e+01, 1.134e-02, 3.175e-02, 3.125e-02, 1.135e-02,
       1.879e-02, 5.348e-03, 1.358e+01, 2.868e+01, 8.736e+01, 5.530e+02,
       1.452e-01, 2.338e-01, 1.688e-01, 8.194e-02, 2.268e-01, 9.082e-02])

In [20]:
gaussianNb_model.predict([pred_one])

array([1.])

In [21]:
ytest.iloc[77]

1.0

In [23]:
!install python 3.8

install: cannot stat 'python': No such file or directory
