# Model Training Script
### This script is used to create Naive Bayes Classifier model for classifying mango variety using feature vector of mango leaf.

In [57]:
import os
import numpy as np
import pandas
import joblib
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

### Prepare dataset directory and load dataset csv file into Pandas DataFrame

In [58]:
# Leaves Dataset Folder Name
dataset_folder = 'PreprocessedDatabase'
# Leaves Dataset File Name
dataset_filename = 'labeled_dataset.csv'
# Current working directory
cwd = os.getcwd()
# Load the dataset from CSV file into pandas DataFrame
data = pandas.read_csv(os.path.join(cwd, dataset_folder, dataset_filename))

### Create lists of feature vectors and corresponding labels from dataframe

In [59]:
# Mapping of string labels to numeric value
label_map = {
    'alphonso': 0,
    'amrapali': 1,
    'chausa': 2,
    'dusheri': 3,
    'langra': 4
}
# List of feature vectors
X = data.iloc[:, :len(data.columns)-1].to_numpy(copy=True)
# List of corresponding label to each feature vector
Y = data.iloc[:,-1].to_numpy(copy=True)
# Map string labels to numeric values
Y = np.array([label_map[label] for label in Y])

### Create and Split Training and Test Data

In [60]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.75, test_size=0.25, shuffle=True)

### Train and Test Naive Bayes Classifier

In [61]:
# Train Naive Bayes Model using training data
classifier = GaussianNB()
classifier.fit(X_train, Y_train)

# Model predictions using test data
nb_predictions = classifier.predict(X_test) 
  
# model accuracy for X_test
accuracy = classifier.score(X_test, Y_test)
  
# creating a confusion matrix
cm = confusion_matrix(Y_test, nb_predictions)

In [62]:
print(accuracy)

0.6053811659192825


In [63]:
print(cm)

[[28  0  2  3  4]
 [ 4 13  1 17  4]
 [ 6  4 32  0  8]
 [ 2  7  0 37  1]
 [11  5  6  3 25]]


### Serialize Naive Bayes classifier to a file

In [64]:
model_file_name = "mango_leaf_classifier.nb"
# Writes Naive bayes classifier to a file
joblib.dump(classifier, model_file_name)

['mango_leaf_classifier.nb']