# Model Training Script
### This script is used to create Random Forest Classifier model for classifying mango variety using feature vector of mango leaf.

In [401]:
import os
import numpy as np
import pandas
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

### Prepare dataset directory and load dataset csv file into Pandas DataFrame

In [402]:
# Leaves Dataset Folder Name
dataset_folder = 'PreprocessedDatabase'
# Leaves Dataset File Name
dataset_filename = 'labeled_dataset.csv'
# Current working directory
cwd = os.getcwd()
# Load the dataset from CSV file into pandas DataFrame
data = pandas.read_csv(os.path.join(cwd, dataset_folder, dataset_filename))

### Create lists of feature vectors and corresponding labels from dataframe

In [403]:
# Mapping of string labels to numeric value
label_map = {
    'alphonso': 0,
    'amrapali': 1,
    'chausa': 2,
    'dusheri': 3,
    'langra': 4
}
# List of feature vectors
X = data.iloc[:, :len(data.columns)-1].to_numpy(copy=True)
# List of corresponding label to each feature vector
Y = data.iloc[:,-1].to_numpy(copy=True)
# Map string labels to numeric values
Y = np.array([label_map[label] for label in Y])

### Create and Split Training and Test Data

In [404]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.75, test_size=0.25, shuffle=True)

### Train and Test Random Forest Classifier

In [405]:
# Train Random Forest Classifier using training data
rf_classifier = RandomForestClassifier(n_estimators=30, random_state=0, max_depth=20)
rf_classifier.fit(X_train, Y_train)

# Predictions using test data
predictions = rf_classifier.predict(X_test) 
  
# Accuracy for X_test   
accuracy = rf_classifier.score(X_test, Y_test)
  
# creating a confusion matrix
cm = confusion_matrix(Y_test, predictions)

In [406]:
print(accuracy)

0.6995515695067265


In [407]:
print(cm)

[[30  1  6  2  4]
 [ 6 21  1  9  7]
 [ 5  0 35  0  3]
 [ 2  5  0 30  1]
 [ 5  1  9  0 40]]


### Serialize Random forest model to a file

In [408]:
model_file_name = "mango_leaf_classifier.rf"
# Writes Random Forest Classifier to a file
joblib.dump(rf_classifier, model_file_name)

['mango_leaf_classifier.rf']