In [1]:
import tensorflow as tf

2024-01-25 10:24:14.343744: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import os,timeit
import numpy as np

In [3]:
from numpy import random

In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_visible_devices(physical_devices[1], 'GPU')

Num GPUs Available:  2


In [5]:
from sys import platform
DATA_DIRECTORY = ""
SLASH = ""
if platform == "linux" or platform == "linux2":
    DATA_DIRECTORY = "/home/tyagi/Desktop/wheat/data/BULK/"
    SLASH = "/"
elif platform == "win32":
    DATA_DIRECTORY = "D:\wheat\data\BULK\\"
    SLASH="\\"

In [6]:
#Constants
BAND_NUMBER = 60
FILLED_AREA_RATIO = 0.90
TRAIN_IMAGE_COUNT = 1200
VAL_IMAGE_COUNT = 400
TEST_IMAGE_COUNT = 400
NUM_VARIETIES = 4

REMOVE_NOISY_BANDS = False
FIRST_BAND = 15
LAST_BAND = 161

IMAGE_WIDTH = 30
IMAGE_HEIGHT = 30

In [7]:
from enum import Enum

class filter_method(Enum):
    none = 0
    snv = 1
    msc = 2
    savgol = 3
    
FILTER = filter_method(0).name

# to be set if filter chosen is savgol
WINDOW = 7
ORDER = 2
DERIVATIVE = "none"

In [8]:
from enum import Enum
 
class feature_extraction_method(Enum):
    none = 0
    pca_loading = 1
    lda = 2
    ipca = 3

FEATURE_EXTRACTION = feature_extraction_method(0).name

NUM_OF_BANDS = 3
if FEATURE_EXTRACTION == "pca_loading" or FEATURE_EXTRACTION == "ipca":
    NUM_OF_BANDS = 8
elif FEATURE_EXTRACTION == "lda":
    NUM_OF_BANDS = 3
    assert NUM_OF_BANDS <= min(NUM_VARIETIES-1,168),"NUM_OF_BANDS is greater."

In [9]:
# List for All varieties
VARIETIES = []
VARIETIES_CODE = {}

for name in os.listdir(DATA_DIRECTORY):
    if (name.endswith(".hdr") or name.endswith(".bil")):
        continue
    VARIETIES_CODE[name] = len(VARIETIES)
    VARIETIES.append(name)
    if len(VARIETIES)==NUM_VARIETIES:
        break

In [10]:
def start_timer():
    print("Testing started")
    return timeit.default_timer()

def end_timer():
    return timeit.default_timer()

def show_time(tic,toc): 
    test_time = toc - tic
    print('Testing time (s) = ' + str(test_time) + '\n')

In [11]:
def dataset_file_name(variety):
    name = "./dataset/V"+str(variety).zfill(3)+"_FilledArea_"+str(FILLED_AREA_RATIO)+"_NumOfBands_"+str(NUM_OF_BANDS)+"_FB_"+str(FIRST_BAND)+"_LB_"+str(LAST_BAND)+"_BandNo_"+str(BAND_NUMBER)+"_ImageHeight_"+str(IMAGE_HEIGHT)+"_ImageWidth_"+str(IMAGE_WIDTH)+"_FILTER_"+str(FILTER)+"_FeatureExtraction_"+str(FEATURE_EXTRACTION)
    if REMOVE_NOISY_BANDS:
        name+="_REMOVE_NOISY_BANDS_"+str(REMOVE_NOISY_BANDS)
    if FILTER == "savgol":
        name+="_WINDOW_"+str(WINDOW)+"_ORDER_"+str(ORDER)
    return name

In [12]:
train_dataset = []
train_dataset_label = []
test_dataset=[]
test_dataset_label = []

for idx, v in enumerate(VARIETIES):
    print("idx: ",idx)
    if idx >= NUM_VARIETIES:
        break
    train_dataset= train_dataset + np.load(dataset_file_name(v)+"_train_dataset.npy").tolist()
    train_dataset_label = train_dataset_label + np.load(dataset_file_name(v)+"_train_dataset_label.npy").tolist()
    test_dataset = test_dataset + np.load(dataset_file_name(v)+"_test_dataset.npy").tolist()
    test_dataset_label = test_dataset_label + np.load(dataset_file_name(v)+"_test_dataset_label.npy").tolist()
    
train_dataset = np.array(train_dataset)
train_dataset_label = np.array(train_dataset_label)
test_dataset = np.array(test_dataset)
test_dataset_label = np.array(test_dataset_label)

idx:  0
idx:  1
idx:  2
idx:  3


In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [14]:
nsamples, nx, ny,nb = train_dataset.shape
nsamplestest, nx, ny,nb = test_dataset.shape
d2_train_dataset = train_dataset.reshape((nsamples,nx*ny*nb))
d2_test_dataset = test_dataset.reshape((nsamplestest,nx*ny*nb))

In [15]:
for x in range(1,16):
    clf3 = RandomForestClassifier(n_estimators = x*100)
    tic = start_timer()
    clf3.fit(d2_train_dataset, train_dataset_label)
    toc = end_timer()
    show_time(tic,toc)
    
    y_pred = clf3.predict(d2_test_dataset)
    
    print("Estimators: ",x*100,"  ACCURACY OF THE MODEL: ", accuracy_score(test_dataset_label, y_pred))
    print(confusion_matrix(test_dataset_label, y_pred))

    print("Classification Report:")
    print(classification_report(test_dataset_label, y_pred))

Testing started
Testing time (s) = 282.81096911698114

Estimators:  100   ACCURACY OF THE MODEL:  0.658125
[[197 145  56   2]
 [ 42 316  40   2]
 [ 57  46 220  77]
 [ 15   8  57 320]]
Classification Report:
              precision    recall  f1-score   support

           0       0.63      0.49      0.55       400
           1       0.61      0.79      0.69       400
           2       0.59      0.55      0.57       400
           3       0.80      0.80      0.80       400

    accuracy                           0.66      1600
   macro avg       0.66      0.66      0.65      1600
weighted avg       0.66      0.66      0.65      1600

Testing started
Testing time (s) = 577.9955651959754

Estimators:  200   ACCURACY OF THE MODEL:  0.67375
[[178 164  58   0]
 [ 32 322  46   0]
 [ 34  54 247  65]
 [ 15   8  46 331]]
Classification Report:
              precision    recall  f1-score   support

           0       0.69      0.45      0.54       400
           1       0.59      0.81      0.68 

Testing time (s) = 4179.519232675957

Estimators:  1400   ACCURACY OF THE MODEL:  0.698125
[[187 155  58   0]
 [ 28 327  45   0]
 [ 27  50 265  58]
 [ 12  12  38 338]]
Classification Report:
              precision    recall  f1-score   support

           0       0.74      0.47      0.57       400
           1       0.60      0.82      0.69       400
           2       0.65      0.66      0.66       400
           3       0.85      0.84      0.85       400

    accuracy                           0.70      1600
   macro avg       0.71      0.70      0.69      1600
weighted avg       0.71      0.70      0.69      1600

Testing started
Testing time (s) = 4337.809597346

Estimators:  1500   ACCURACY OF THE MODEL:  0.695625
[[191 154  55   0]
 [ 25 324  51   0]
 [ 34  42 264  60]
 [ 13  12  41 334]]
Classification Report:
              precision    recall  f1-score   support

           0       0.73      0.48      0.58       400
           1       0.61      0.81      0.70       400
       