In [1]:
import tensorflow as tf

2023-08-05 09:08:32.117606: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import os,timeit
import numpy as np

In [3]:
from numpy import random

In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  1


In [5]:
from sys import platform
DATA_DIRECTORY = ""
SLASH = ""
if platform == "linux" or platform == "linux2":
    DATA_DIRECTORY = "/home/tyagi/Desktop/wheat/data/BULK/"
    SLASH = "/"
elif platform == "win32":
    DATA_DIRECTORY = "D:\mvl\wheat\data\BULK\\"
    SLASH="\\"

In [6]:
#Constants
BAND_NUMBER = 60
FILLED_AREA_RATIO = 0.90
TOTAL_IMAGE_COUNT = 2400
IMAGE_COUNT = int(TOTAL_IMAGE_COUNT/4)
NUM_VARIETIES = 4

REMOVE_NOISY_BANDS = False
FIRST_BAND = 15
LAST_BAND = 161

IMAGE_WIDTH = 30
IMAGE_HEIGHT = 30

In [7]:
from enum import Enum

class filter_method(Enum):
    none = 0
    snv = 1
    msc = 2
    savgol = 3
    
FILTER = filter_method(2).name

# to be set if filter chosen is savgol
WINDOW = 7
ORDER = 2
DERIVATIVE = "none"

In [8]:
from enum import Enum
 
class feature_extraction_method(Enum):
    none = 0
    pca_loading = 1
    lda = 2
    ipca = 3

FEATURE_EXTRACTION = feature_extraction_method(0).name

NUM_OF_BANDS = 3
if FEATURE_EXTRACTION == "pca_loading" or FEATURE_EXTRACTION == "ipca":
    NUM_OF_BANDS = 8
elif FEATURE_EXTRACTION == "lda":
    NUM_OF_BANDS = 3
    assert NUM_OF_BANDS <= min(NUM_VARIETIES-1,168),"NUM_OF_BANDS is greater."

In [9]:
# List for All varieties
VARIETIES = []
VARIETIES_CODE = {}

for name in os.listdir(DATA_DIRECTORY):
    if (name.endswith(".hdr") or name.endswith(".bil")):
        continue
    VARIETIES_CODE[name] = len(VARIETIES)
    VARIETIES.append(name)
    if len(VARIETIES)==NUM_VARIETIES:
        break

In [10]:
def start_timer():
    print("Testing started")
    return timeit.default_timer()

def end_timer():
    return timeit.default_timer()

def show_time(tic,toc): 
    test_time = toc - tic
    print('Testing time (s) = ' + str(test_time) + '\n')

In [11]:
def dataset_file_name(variety):
    name = "./dataset/V"+str(variety).zfill(3)+"_IC_"+str(TOTAL_IMAGE_COUNT).zfill(5)+"_FilledArea_"+str(FILLED_AREA_RATIO)+"_NumOfBands_"+str(NUM_OF_BANDS)+"_FB_"+str(FIRST_BAND)+"_LB_"+str(LAST_BAND)+"_BandNo_"+str(BAND_NUMBER)+"_ImageHeight_"+str(IMAGE_HEIGHT)+"_ImageWidth_"+str(IMAGE_WIDTH)+"_FILTER_"+str(FILTER)+"_FeatureExtraction_"+str(FEATURE_EXTRACTION)
    if REMOVE_NOISY_BANDS:
        name+="_REMOVE_NOISY_BANDS_"+str(REMOVE_NOISY_BANDS)
    if FILTER == "savgol":
        name+="_WINDOW_"+str(WINDOW)+"_ORDER_"+str(ORDER)
    return name

In [12]:
train_dataset = []
train_dataset_label = []
test_dataset=[]
test_dataset_label = []

for idx, v in enumerate(VARIETIES):
    print("idx: ",idx)
    if idx >= NUM_VARIETIES:
        break
    train_dataset= train_dataset + np.load(dataset_file_name(v)+"_train_dataset.npy").tolist()
    train_dataset_label = train_dataset_label + np.load(dataset_file_name(v)+"_train_dataset_label.npy").tolist()
    test_dataset = test_dataset + np.load(dataset_file_name(v)+"_test_dataset.npy").tolist()
    test_dataset_label = test_dataset_label + np.load(dataset_file_name(v)+"_test_dataset_label.npy").tolist()
    
train_dataset = np.array(train_dataset)
train_dataset_label = np.array(train_dataset_label)
test_dataset = np.array(test_dataset)
test_dataset_label = np.array(test_dataset_label)

idx:  0
idx:  1
idx:  2
idx:  3


In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [14]:
nsamples, nx, ny,nb = train_dataset.shape
nsamplestest, nx, ny,nb = test_dataset.shape
d2_train_dataset = train_dataset.reshape((nsamples,nx*ny*nb))
d2_test_dataset = test_dataset.reshape((nsamplestest,nx*ny*nb))

In [15]:
for x in range(1,15):
    clf3 = RandomForestClassifier(n_estimators = x*100)
    tic = start_timer()
    clf3.fit(d2_train_dataset, train_dataset_label)
    toc = end_timer()
    show_time(tic,toc)
    
    y_pred = clf3.predict(d2_test_dataset)
    
    print("Estimators: ",x*100,"  ACCURACY OF THE MODEL: ", accuracy_score(test_dataset_label, y_pred))
    print(confusion_matrix(test_dataset_label, y_pred))

    print("Classification Report:")
    print(classification_report(test_dataset_label, y_pred))

Testing started
Testing time (s) = 440.661719583004

Estimators:  100   ACCURACY OF THE MODEL:  0.6604166666666667
[[298  83  38  61]
 [ 69 313  77  21]
 [ 28  58 381  13]
 [ 97  53  54 276]]
Classification Report:
              precision    recall  f1-score   support

           0       0.61      0.62      0.61       480
           1       0.62      0.65      0.63       480
           2       0.69      0.79      0.74       480
           3       0.74      0.57      0.65       480

    accuracy                           0.66      1920
   macro avg       0.66      0.66      0.66      1920
weighted avg       0.66      0.66      0.66      1920

Testing started
Testing time (s) = 873.6248341510072

Estimators:  200   ACCURACY OF THE MODEL:  0.6901041666666666
[[303  93  30  54]
 [ 64 320  79  17]
 [ 29  37 403  11]
 [ 92  44  45 299]]
Classification Report:
              precision    recall  f1-score   support

           0       0.62      0.63      0.63       480
           1       0.65  