In [None]:
'''
Naive Bayes Theorem:
Definition:
Naive Bayes is a probabilistic classifier based on applying Bayes' theorem with strong (naive) 
independence assumptions between the features. 
It assumes that the presence of a particular feature in a class is unrelated to the presence of any other feature.
It is particularly useful for large datasets and text classification tasks.
Its a simple yet effective algorithm for classification tasks.
It is often used in spam detection, sentiment analysis, and document classification.

Formula:
P(C|X) = (P(X|C) * P(C)) / P(X)

Where:
- P(C|X) is the posterior probability of class C given features X.
- P(X|C) is the likelihood of features X given class C.

'''

In [None]:
'''
Important topics:
1. Probability
2. Bayes' Theorem
3. Naive Bayes Classifier

'''

In [None]:
'''
1. Probability:
a. indepentent events = eg. coin tosses
b. dependent events = eg. drawing cards from a deck without replacement
c. conditional probability = eg. P(A|B) = P(A and B) / P(B) 
                                        = P(B|A) * P(A) / P(B) 
d. joint probability = eg. P(A and B) = P(A|B) * P(B) = P(B|A) * P(A)
'''

In [None]:
'''
Bayes' Theorem:
Bayes' theorem is a fundamental theorem in probability theory that describes how to 
update the probability of a hypothesis based on new evidence. 
It is expressed as:
P(H|E) = (P(E|H) * P(H)) / P(E)

Derivation:
P(A and B) = P(B and A)
P(A)*P(B|A) = P(B)*P(A|B)
P(A|B) = P(B|A) * P(A) / P(B) -- this is Bayes' theorem

where:
- P(A/B) = probability of A given B has occurred (posterior probability)
- P(B/A) = probability of B given A has occurred (likelihood)
- P(A) = prior probability of A
- P(B) = prior probability of B
'''

In [None]:
'''
How we use Bayes' Theorem in machine learning:
1. Classification: 
   - Naive Bayes classifiers use Bayes' theorem to predict the class of a given instance based on its features.
   - It calculates the posterior probability for each class and selects the class with the highest probability.
2. Spam Detection:
   - Naive Bayes is commonly used in spam detection systems to classify emails as spam or not spam based on the words in the email.
   
'''

In [None]:
'''
Variants of Naive Bayes:
1. Gaussian Naive Bayes:
   - Assumes that the features follow a Gaussian (normal) distribution.
   - Suitable for continuous data.
2. Multinomial Naive Bayes:
    - Used for discrete data, particularly for text classification tasks.
    - Assumes that the features are counts or frequencies of events.
3. Bernoulli Naive Bayes:
    - Similar to Multinomial Naive Bayes but assumes binary features (presence or absence of a feature).
    - Useful for binary/boolean features.
4. Complement Naive Bayes:
    - A variant of Multinomial Naive Bayes that is particularly effective for imbalanced datasets.
    - It uses the complement of the class to improve classification performance.
'''

## Naive Bayes Theorem Implementation 

In [1]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import pandas as pd

In [3]:
# we will be doing multiclass classification using the iris dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [None]:
# numerical features
X_train

array([[5. , 2. , 3.5, 1. ],
       [6.5, 3. , 5.5, 1.8],
       [6.7, 3.3, 5.7, 2.5],
       [6. , 2.2, 5. , 1.5],
       [6.7, 2.5, 5.8, 1.8],
       [5.6, 2.5, 3.9, 1.1],
       [7.7, 3. , 6.1, 2.3],
       [6.3, 3.3, 4.7, 1.6],
       [5.5, 2.4, 3.8, 1.1],
       [6.3, 2.7, 4.9, 1.8],
       [6.3, 2.8, 5.1, 1.5],
       [4.9, 2.5, 4.5, 1.7],
       [6.3, 2.5, 5. , 1.9],
       [7. , 3.2, 4.7, 1.4],
       [6.5, 3. , 5.2, 2. ],
       [6. , 3.4, 4.5, 1.6],
       [4.8, 3.1, 1.6, 0.2],
       [5.8, 2.7, 5.1, 1.9],
       [5.6, 2.7, 4.2, 1.3],
       [5.6, 2.9, 3.6, 1.3],
       [5.5, 2.5, 4. , 1.3],
       [6.1, 3. , 4.6, 1.4],
       [7.2, 3.2, 6. , 1.8],
       [5.3, 3.7, 1.5, 0.2],
       [4.3, 3. , 1.1, 0.1],
       [6.4, 2.7, 5.3, 1.9],
       [5.7, 3. , 4.2, 1.2],
       [5.4, 3.4, 1.7, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [6.9, 3.1, 4.9, 1.5],
       [4.6, 3.1, 1.5, 0.2],
       [5.9, 3. , 5.1, 1.8],
       [5.1, 2.5, 3. , 1.1],
       [4.6, 3.4, 1.4, 0.3],
       [6.2, 2

In [5]:
from sklearn.naive_bayes import GaussianNB

In [6]:
glb = GaussianNB()
# fit the model
glb.fit(X_train, y_train)

In [7]:
y_pred = glb.predict(X_test)

In [8]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        16
           1       1.00      1.00      1.00        18
           2       1.00      1.00      1.00        11

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Confusion Matrix:
 [[16  0  0]
 [ 0 18  0]
 [ 0  0 11]]


In [50]:
## work on tips dataset with naive bayes to predict smoking status
import seaborn as sns
tips = sns.load_dataset('tips')
tips.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [51]:
# feature and target variable
X = tips.drop('smoker', axis=1)
Y = tips['smoker']


In [59]:
# itentify categorical features and numerical features
categorical_col = X.select_dtypes(include=['object', 'category']).columns.to_list()
print(categorical_col)
numerical_col = X.select_dtypes(include = ['int64', 'float64']).columns.to_list()
print(numerical_col)

['sex', 'day', 'time']
['total_bill', 'tip', 'size']


In [60]:
# one-hot encoding for categorical features
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder


preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(drop='first', handle_unknown='ignore'), categorical_col)],
remainder = 'passthrough'
)

In [61]:
from sklearn.naive_bayes import BernoulliNB, MultinomialNB
from sklearn.pipeline import Pipeline
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', BernoulliNB())
]
)

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

In [56]:
model_pipeline.fit(X_train, y_train)
y_pred = model_pipeline.predict(X_test)

In [57]:


from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
print("Accuracy:", accuracy_score(y_test, y_pred))

print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.5540540540540541
Classification Report:
               precision    recall  f1-score   support

          No       0.56      0.69      0.62        39
         Yes       0.54      0.40      0.46        35

    accuracy                           0.55        74
   macro avg       0.55      0.55      0.54        74
weighted avg       0.55      0.55      0.54        74

Confusion Matrix:
 [[27 12]
 [21 14]]
