# Naïve Bayes Classifier

## Import the necessary libraires

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report

## Read the data

In [None]:
data = pd.read_csv('chirper.csv', encoding = "ISO-8859-1")
data.head(10)

In [None]:
data['Target'].value_counts()

## Define the inputs and the target

Identify the **relevant** columns and declare the variables below.

In [None]:
inputs = data['Text']
target = data['Target']

## Split the data into training and tesitng

* Perform a 70:30 split - 30% of the data should be dedicated to testing.
* Set the random state to 365.
* Make sure that there is a fair distribution of the representatives from each class.

In [None]:
x_train, x_test, y_train, y_test = train_test_split(inputs, target,
                                                    test_size = 0.3,
                                                    random_state=365,
                                                    stratify = True)

## Transform the training and testing inputs using CountVectorizer

In [None]:
vec = CountVectorizer()
x_train_transform = vec.fit_transform(x_train)
x_test_transform = vec.transform(x_test)

## Create a Naïve Bayes model

* Create an instance of the Naïve Bayes class.
* Fit the data to the model.

In [None]:
nbm=MultinomialNB()
nbm.fit(x_train_transform, y_train)

## Make predictions on the test data

In [None]:
y_predict = nbm.predict(x_test_transform)

## Create a confusion matrix

In [None]:
ConfusionMatrixDisplay.from_predictions( y_test, y_predict,
                                        labels = nbm.classes_,
                                        cmap = 'magma')

## Print a classification report

* Fill in the parameters necessary for the classification_report() method.
* A parameter **zero_division** has been also added and set to 0.

In [None]:
print(classification_report(y_test, y_predict,
                            target_names = ['positive','negative'],
                            zero_division = 0))