# [Machine Learning] Supervised Learning

## Naive Bayes Classifier Algorithm

$P(A|B)=\frac{P(B|A)P(A)}{P(B)}$

Naive Bayes is probabilistic supervised Machine Learning algorithrms

### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Load Dataset

In [2]:
from sklearn.datasets import load_breast_cancer

In [3]:
dt = load_breast_cancer()

In [4]:
dt.data

array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
        1.189e-01],
       [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
        8.902e-02],
       [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
        8.758e-02],
       ...,
       [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
        7.820e-02],
       [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
        1.240e-01],
       [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
        7.039e-02]])

In [5]:
dt.feature_names

array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',
       'mean smoothness', 'mean compactness', 'mean concavity',
       'mean concave points', 'mean symmetry', 'mean fractal dimension',
       'radius error', 'texture error', 'perimeter error', 'area error',
       'smoothness error', 'compactness error', 'concavity error',
       'concave points error', 'symmetry error',
       'fractal dimension error', 'worst radius', 'worst texture',
       'worst perimeter', 'worst area', 'worst smoothness',
       'worst compactness', 'worst concavity', 'worst concave points',
       'worst symmetry', 'worst fractal dimension'], dtype='<U23')

In [6]:
dt.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0,

In [7]:
dt.target_names

array(['malignant', 'benign'], dtype='<U9')

### Create Dataframe

In [8]:
df = pd.DataFrame(np.c_[dt.data, dt.target], columns=[list(dt.feature_names)+['target']])

In [9]:
df.tail()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
564,21.56,22.39,142.0,1479.0,0.111,0.1159,0.2439,0.1389,0.1726,0.05623,...,26.4,166.1,2027.0,0.141,0.2113,0.4107,0.2216,0.206,0.07115,0.0
565,20.13,28.25,131.2,1261.0,0.0978,0.1034,0.144,0.09791,0.1752,0.05533,...,38.25,155.0,1731.0,0.1166,0.1922,0.3215,0.1628,0.2572,0.06637,0.0
566,16.6,28.08,108.3,858.1,0.08455,0.1023,0.09251,0.05302,0.159,0.05648,...,34.12,126.7,1124.0,0.1139,0.3094,0.3403,0.1418,0.2218,0.0782,0.0
567,20.6,29.33,140.1,1265.0,0.1178,0.277,0.3514,0.152,0.2397,0.07016,...,39.42,184.6,1821.0,0.165,0.8681,0.9387,0.265,0.4087,0.124,0.0
568,7.76,24.54,47.92,181.0,0.05263,0.04362,0.0,0.0,0.1587,0.05884,...,30.37,59.16,268.6,0.08996,0.06444,0.0,0.0,0.2871,0.07039,1.0


### Split Data

In [10]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [11]:
x.shape, y.shape

((569, 30), (569,))

In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=.2,random_state=22)

In [13]:
x_train.shape, y_train.shape

((455, 30), (455,))

In [14]:
x_test.shape, y_test.shape

((114, 30), (114,))

## Naive Bayes Classifier Model

### Naive Baye with Gaussian

In [15]:
from sklearn.naive_bayes import GaussianNB

In [16]:
model_gau = GaussianNB()
model_gau.fit(x_train, y_train)

GaussianNB()

In [17]:
model_gau.score(x_test, y_test)

0.9649122807017544

### Naive Baye with Multinomial

In [18]:
from sklearn.naive_bayes import MultinomialNB

In [19]:
model_mul = MultinomialNB()
model_mul.fit(x_train, y_train)

MultinomialNB()

In [20]:
model_mul.score(x_test, y_test)

0.9035087719298246

### Naive Baye with Bernoulli

In [21]:
from sklearn.naive_bayes import BernoulliNB

In [22]:
model_ber = BernoulliNB()
model_ber.fit(x_train, y_train)

BernoulliNB()

In [23]:
model_ber.score(x_test, y_test)

0.6228070175438597

### Predict

In [24]:
pre = np.random.rand(1,30)
pre

array([[0.53603844, 0.82880207, 0.98962857, 0.77361416, 0.25347535,
        0.15459405, 0.7010119 , 0.98070027, 0.35998817, 0.94259342,
        0.50603347, 0.20213591, 0.86609247, 0.25784285, 0.40213223,
        0.36669447, 0.30746906, 0.35409394, 0.24303132, 0.56739611,
        0.52688456, 0.00649166, 0.68449359, 0.26023593, 0.18844452,
        0.87433157, 0.63636461, 0.3884035 , 0.17296008, 0.69153542]])

In [25]:
model_gau.predict(pre)

array([0.])

In [26]:
dt.target_names

array(['malignant', 'benign'], dtype='<U9')