## Implementations

In [None]:
import pandas
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import sklearn

print(f'Seaborn version: {sns.__version__}')
print(f'Sklearn version: {sklearn.__version__}')

# Reading and preparing the dataset

In [None]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

In [None]:
print(f'Type of features array: {type(X)}')

# sepal_length,  sepal_width,  petal_length,  petal_width
print(f'{X[:5]}')

## _Is it applicable to use an ordered array for labels?_

In [None]:
print(f'Type of label array: {type(y)}')

# species
print(f'{y[:5]}')

## _Why we need to divide the dataset into train, validation, and test?_

In [None]:
from sklearn.model_selection import train_test_split

# 80% Train, 10% Validation, %10 Test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_test, y_test, test_size=0.5, random_state=42)

In [None]:
print(f'Total # of sample in whole dataset: {len(X)}')
print(f'Total # of sample in train dataset: {len(X_train)}')
print(f'Total # of sample in validation dataset: {len(X_valid)}')
print(f'Total # of sample in test dataset: {len(X_test)}')

# Creating the model

https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html#sklearn.naive_bayes.GaussianNB

In [None]:
from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB(var_smoothing=10)

In [None]:
nb_model.fit(X_train, y_train)

In [None]:
# example = [[1.0, 1.0, 1.0, 1.0]]
example = [[2.3, 8.7, 1.0, 2.3]]

prediction = nb_model.predict(example)
print(f'Prediction for given example: {prediction}')

In [None]:
validation_score = nb_model.score(X_valid, y_valid)
print(f'Validation score of trained model: {validation_score}')

## Make another experiment with different hyperparameters

In [None]:
nb_model = GaussianNB(var_smoothing=1)
nb_model.fit(X_train, y_train)

validation_score = nb_model.score(X_valid, y_valid)
print(f'Validation score of trained model: {validation_score}')

In [None]:
nb_model = GaussianNB(var_smoothing=1e-1)
nb_model.fit(X_train, y_train)

validation_score = nb_model.score(X_valid, y_valid)
print(f'Validation score of trained model: {validation_score}')

#### After hyperparameters are selected regarding experiment with max score, now the model will be evaluated on test dataset

In [None]:
test_score = nb_model.score(X_test, y_test)
print(f'Test score of trained model: {test_score}')

## Investigating the results

In [None]:
from sklearn.metrics import confusion_matrix

y_predictions = nb_model.predict(X_test)

conf_matrix = confusion_matrix(y_predictions, y_test)
print(conf_matrix)

sns.heatmap(conf_matrix, annot=True)

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_predictions, y_test))