FRUIT Classification using different algorithms

In [None]:
import numpy as np 
import cv2
import glob
import os
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score,precision_score,recall_score,f1_score
dim = 100


def getYourFruits(fruits, data_type, print_n=False, k_fold=False):
    images = []
    labels = []
    val = ['Training', 'Test']
    if not k_fold:
        path = "datasets/fruits-360_dataset/fruits-360/" + data_type + "/"        
        for i,f in enumerate(fruits):
            p = path + f
            j=0
            for image_path in glob.glob(os.path.join(p, "*.jpg")):
                image = cv2.imread(image_path, cv2.IMREAD_COLOR)
                image = cv2.resize(image, (dim, dim))
                image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                images.append(image)
                labels.append(i)
                j+=1
            if(print_n):
                print("There are " , j , " " , data_type.upper(), " images of " , fruits[i].upper())
        images = np.array(images)
        labels = np.array(labels)
        return images, labels
    else:
        for v in val:
            path = "datasets/fruits-360_dataset/fruits-360/" + v + "/"
            for i,f in enumerate(fruits):
                p = path + f
                print(p)
                j=0
                for image_path in glob.glob(os.path.join(p, "*.jpg")):
                    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
                    image = cv2.resize(image, (dim, dim))
                    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
                    images.append(image)
                    labels.append(i)
                    j+=1
        images = np.array(images)
        labels = np.array(labels)
        return images, labels
    
def getAllFruits():
    fruits = []
    for fruit_path in glob.glob("datasets/fruits-360_dataset/fruits-360/Training/*"):
        print(fruit_path)
        fruit = fruit_path.split("/")[-1]
        fruits.append(fruit)
    return fruits
    

In [None]:
#Choose your Fruits
fruits = ['Pineapple' , 'Cocos'] #Binary classification

#Get Images and Labels 
X_t, y_train =  getYourFruits(fruits, 'Training', print_n=True, k_fold=False)
X_test, y_test = getYourFruits(fruits, 'Test', print_n=True, k_fold=False)

#Get data for k-fold
X,y = getYourFruits(fruits, '', print_n=True, k_fold=True)

X_original=X

#Scale Data Images
scaler = StandardScaler()
X_train = scaler.fit_transform([i.flatten() for i in X_t])
X_test = scaler.fit_transform([i.flatten() for i in X_test])
X = scaler.fit_transform([i.flatten() for i in X])

There are  490   TRAINING  images of  PINEAPPLE
There are  490   TRAINING  images of  COCOS
There are  166   TEST  images of  PINEAPPLE
There are  166   TEST  images of  COCOS
datasets/fruits-360_dataset/fruits-360/Training/Pineapple
datasets/fruits-360_dataset/fruits-360/Training/Cocos
datasets/fruits-360_dataset/fruits-360/Test/Pineapple
datasets/fruits-360_dataset/fruits-360/Test/Cocos


MULTI-CLASS CLASSIFICATION

In [None]:
fruits = ['Orange', 'Banana' , 'Strawberry', 'Apple Golden 1', 'Kiwi' , 'Lemon', 'Cocos' , 'Pineapple' , 'Peach', 'Cherry 1', 'Cherry 2', 'Mandarine']

#Get Images and Labels
X, y =  getYourFruits(fruits, 'Training')
X_test, y_test = getYourFruits(fruits, 'Test')

#Scale Data Images
scaler = StandardScaler()
X_train = scaler.fit_transform([i.flatten() for i in X])
X_test = scaler.fit_transform([i.flatten() for i in X_test])


#SVM
model = SVC(gamma='auto', kernel='linear')
model.fit(X_train, y) 
y_pred = model.predict(X_test)
print('Performance Metrics of Test Data with SVM : Accuracy : %f , Recall : %f , Precision : %f, F1 Score: %f' % (accuracy_score(y_test, y_pred)*100, recall_score(y_test, y_pred,average='micro')*100, precision_score(y_test, y_pred,average='micro')*100, f1_score(y_test, y_pred,average='micro')*100) )
print('----------------------------------------------------')
#K-NN
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train, y)
y_pred = model.predict(X_test)
print('Performance Metrics of Test Data with KNN : Accuracy : %f , Recall : %f , Precision : %f, F1 Score: %f' % (accuracy_score(y_test, y_pred)*100, recall_score(y_test, y_pred,average='micro')*100, precision_score(y_test, y_pred,average='micro')*100, f1_score(y_test, y_pred,average='micro')*100) )
print('----------------------------------------------------')

#DECISION TREE
model = DecisionTreeClassifier()
model.fit(X_train,y)
y_pred = model.predict(X_test)
print('Performance Metrics of Test Data with Decision Tree : Accuracy : %f , Recall : %f , Precision : %f, F1 Score: %f' % (accuracy_score(y_test, y_pred)*100, recall_score(y_test, y_pred,average='micro')*100, precision_score(y_test, y_pred,average='micro')*100, f1_score(y_test, y_pred,average='micro')*100) )
print('----------------------------------------------------')

#RandomForest
forest = RandomForestClassifier(n_estimators = 10,min_samples_split = 2)
forest.fit(X_train, y)
y_pred = forest.predict(X_test)
print('Performance Metrics of Test Data with Random Forest Classifier : Accuracy : %f , Recall : %f , Precision : %f, F1 Score: %f' % (accuracy_score(y_test, y_pred)*100, recall_score(y_test, y_pred,average='micro')*100, precision_score(y_test, y_pred,average='micro')*100, f1_score(y_test, y_pred,average='micro')*100) )
print('----------------------------------------------------')

Performance Metrics of Test Data with SVM : Accuracy : 99.363369 , Recall : 99.363369 , Precision : 99.363369, F1 Score: 99.363369
----------------------------------------------------
Performance Metrics of Test Data with KNN : Accuracy : 95.886386 , Recall : 95.886386 , Precision : 95.886386, F1 Score: 95.886386
----------------------------------------------------
Performance Metrics of Test Data with Decision Tree : Accuracy : 74.387855 , Recall : 74.387855 , Precision : 74.387855, F1 Score: 74.387855
----------------------------------------------------
Performance Metrics of Test Data with Random Forest Classifier : Accuracy : 93.878550 , Recall : 93.878550 , Precision : 93.878550, F1 Score: 93.878550
----------------------------------------------------


Hierarchical softmax

In [None]:
pip install napkinxc




You should consider upgrading via the 'C:\Users\nagat\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


In [None]:
from napkinxc.models import HSM
from napkinxc.measures import precision_at_k

X_test = scaler.fit_transform([i.flatten() for i in X_test])
hsm = HSM('/Code')
hsm.fit(X_train, y)
y_hsm = hsm.predict(X_test, top_k=1)
print('----------------------------------------------------')
print('Performance Metrics of Test Data with HSM Classifier : Accuracy : %f , Recall : %f , Precision : %f, F1 Score: %f' % (accuracy_score(y_test, y_hsm)*100, recall_score(y_test, y_hsm,average='micro')*100, precision_score(y_test, y_hsm,average='micro')*100, f1_score(y_test, y_hsm,average='micro')*100) )
print('----------------------------------------------------')


----------------------------------------------------
Performance Metrics of Test Data with HSM Classifier : Accuracy : 99.020568 , Recall : 99.020568 , Precision : 99.020568, F1 Score: 99.020568
----------------------------------------------------
