# Naive Baise using Label Encoding
* __Integers are assigned to each category according to alphabetical order__<br>
    Example:<br>
    Adult -> 0<br>
    Child -> 1
* __Apply Label Encoding only on categorical columns__
* __Types of NaiveBaise Classifiers__
    => __Multinomial NaiveBaise (used for discrete/categorical data which is counted)__
      Example::
      Days of week

    => __Bernoulli NaiveBaise (used for binary data 0/1)__
      Example::
      0 = word occurs in the document
      1 = word does not occur in the document

    => __Guassian Naive Baise (used for continuous data which is measured)__
      Example::
      Temperature

In [22]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB
from sklearn import preprocessing

## Loading Dataset

In [23]:
dataset = pd.read_csv("../../../Datasets/balloons.txt", sep=",")
dataset.columns = ['color','size','action','age','inflated']
dataset.head()

Unnamed: 0,color,size,action,age,inflated
0,YELLOW,SMALL,STRETCH,ADULT,T
1,YELLOW,SMALL,STRETCH,CHILD,F
2,YELLOW,SMALL,DIP,ADULT,F
3,YELLOW,SMALL,DIP,CHILD,F
4,YELLOW,LARGE,STRETCH,ADULT,T


In [24]:
samples_count, features_count = dataset.shape
samples_count, features_count

(19, 5)

## Separating target column from dataset

In [25]:
target = dataset['inflated']
dataset = dataset.drop('inflated', axis=1)

## Label Encoding of Dataset & Target column

### Dataset

In [26]:
le_dataset = preprocessing.LabelEncoder()
le_color = preprocessing.LabelEncoder()
le_size = preprocessing.LabelEncoder()
le_action = preprocessing.LabelEncoder()
le_age = preprocessing.LabelEncoder()

In [27]:
dataset_encoded = dataset.apply(le_dataset.fit_transform)
dataset_encoded.head()

Unnamed: 0,color,size,action,age
0,1,1,1,0
1,1,1,1,1
2,1,1,0,0
3,1,1,0,1
4,1,0,1,0


In [28]:
le_color.fit_transform(dataset['color'])
le_size.fit_transform(dataset['size'])
le_action.fit_transform(dataset['action'])
le_age.fit_transform(dataset['age'])

array([0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1])

### Target Column

In [29]:
le_target = preprocessing.LabelEncoder()
target_encoded = le_target.fit_transform(target)
target_encoded

array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0])

## Train & Test split

In [30]:
x = dataset_encoded.values
y = target_encoded
train_x, test_x, train_y, test_y = train_test_split(x, y, random_state=0)

## Bernoulli NaiveBaise

In [31]:
mnb = BernoulliNB()
mnb.fit(train_x, train_y)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

## Check Accuracy

In [32]:
predictions = mnb.predict(test_x)
accuracy = np.sum(predictions==test_y) / float(len(test_y))
print("%.2f" % accuracy)

1.00


## Prediction by using input data from user

In [36]:
color = input("Enter Color(YELLOW,PURPLE)::")
size = input("Enter Size(LARGE,SMALL)::")
action = input("Enter Action(STRETCH,DIP)::")
age = input("Enter Age(ADULT,CHILD)::")

Enter Color(YELLOW,PURPLE)::PURPLE
Enter Size(LARGE,SMALL)::SMALL
Enter Action(STRETCH,DIP)::DIP
Enter Age(ADULT,CHILD)::CHILD


In [37]:
color_encoded=le_color.transform([color])
size_encoded=le_size.transform([size])
action_encoded=le_action.transform([action])
age_encoded=le_age.transform([age])

In [38]:
user_sample_encoded = [color_encoded[0], size_encoded[0], action_encoded[0], age_encoded[0]]

In [39]:
result_encoded = mnb.predict([user_sample_encoded])
result_decoded = le_target.inverse_transform(result_encoded)
result = result_decoded[0]

In [41]:
if result == 'T':
    print("Balloon is inflated")
else:
    print("Balloon is not inflated")

Balloon is not inflated
