In [1]:
import datetime, logging, os, sys, math, random
logging.disable(logging.WARNING)
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf
import tensorflow_addons as tfa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2 as cv
from PIL import Image as Img
import albumentations as alb

from ImageDataAugmentor.image_data_augmentor import *
from sklearn.model_selection import train_test_split, StratifiedKFold
from albumentations.core.composition import Compose, OneOf
from tensorflow.keras import Sequential, layers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

devices = tf.config.list_physical_devices('GPU')
for device in devices:
   tf.config.experimental.set_memory_growth(device, True) 
print(devices)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


In [2]:
TASK = "sorghum-id"
TASK_ID = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
PATH = os.path.abspath(os.path.join(os.getcwd() ,"../../../../datasets/sorghum-id-fgvc-9"))+"/"

train_dir = PATH+'train_images/'
test_dir = PATH+'test/'

save_dir    = '../results/sorghum/'
log_dir     = os.path.join("../logs/",''.join([TASK,"-",TASK_ID]))
model_dir   = os.path.join("../models/",''.join([TASK,"-",TASK_ID]))+'/'

WIDTH = 512
HEIGHT = 512



In [3]:
submission = pd.read_csv(PATH+'sample_submission.csv')
submission

Unnamed: 0,filename,cultivar
0,1000005362.png,PI_152923
1,1000099707.png,PI_152923
2,1000135300.png,PI_152923
3,1000136796.png,PI_152923
4,1000292439.png,PI_152923
...,...,...
23634,999578153.png,PI_152923
23635,999692877.png,PI_152923
23636,999756998.png,PI_152923
23637,999892248.png,PI_152923


In [4]:
def CLAHE(img):
    clahe = cv.createCLAHE(clipLimit=40, tileGridSize=(10,10))
    t = np.asarray(img)
    t = cv.cvtColor(t, cv.COLOR_BGR2HSV)
    t[:,:,-1] = clahe.apply(t[:,:,-1])
    t = cv.cvtColor(t, cv.COLOR_HSV2BGR)
    t = Img.fromarray(t)
    t = np.array(t)
    return t

In [5]:
test_gen= ImageDataAugmentor(preprocess_input=CLAHE)
test_generator = test_gen.flow_from_dataframe(dataframe=submission,
                                              directory=test_dir,
                                              x_col='filename',
                                              y_col=None,
                                              target_size=(WIDTH,HEIGHT),
                                              color_mode='rgb',
                                              class_mode=None,
                                              batch_size=1,
                                              shuffle=False,)

Found 23639 validated image filenames.


In [6]:
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
STEP_SIZE_TEST,test_generator.n,test_generator.batch_size

(23639, 23639, 1)

In [7]:
MODEL_NAME = "sorghum-id-20220423-000453-EfficientNetB7-imagenet"
MODEL_PATH = "../models/"+MODEL_NAME+"/EfficientNetB7-optimal.h5"

reconstructed_model = tf.keras.models.load_model(MODEL_PATH)

In [8]:
%time
test_generator.reset()
results = reconstructed_model.predict(test_generator,verbose=1,steps=STEP_SIZE_TEST)

CPU times: user 0 ns, sys: 40 µs, total: 40 µs
Wall time: 74.4 µs


In [9]:
import pickle 

predicted_class_indices=np.argmax(results,axis=1)

f = open("../labels.pkl","rb")
labels = pickle.load(f)
f.close()

labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [29]:
filenames=test_generator.filenames
submission=pd.DataFrame({"Filename":[filename.replace('all_classes/','')for filename in filenames],
                      "cultivar":predictions})
submission

Unnamed: 0,Filename,cultivar
0,1000005362.png,PI_175919
1,1000099707.png,PI_255744
2,1000135300.png,PI_196598
3,1000136796.png,PI_329256
4,1000292439.png,PI_157035
...,...,...
23634,999578153.png,PI_152965
23635,999692877.png,PI_152591
23636,999756998.png,PI_144134
23637,999892248.png,PI_144134


In [30]:
submission_name = save_dir+'submission-EfficientNetB4-imagenet-20220422-174202.csv'
submission.to_csv(submission_name,index=False)
os.system('kaggle competitions submit -c sorghum-id-fgvc-9 -f '+submission_name+' -m "EfficientNetB7 with CLAHE PreProc"')

100%|██████████| 563k/563k [00:01<00:00, 296kB/s] 


Successfully submitted to Sorghum -100 Cultivar Identification - FGVC 9

0

In [31]:
MODEL_NAME = "sorghum-id-20220422-170329-EfficientNetB7-imagenet"
MODEL_PATH = "../models/"+MODEL_NAME+"/EfficientNetB7-optimal.h5"

reconstructed_model = tf.keras.models.load_model(MODEL_PATH)

%time
test_generator.reset()
results = reconstructed_model.predict(test_generator,verbose=1,steps=STEP_SIZE_TEST)

CPU times: user 23 µs, sys: 12 µs, total: 35 µs
Wall time: 73 µs


In [35]:
predicted_class_indices=np.argmax(results,axis=1)
f = open("../labels.pkl","rb")
labels = pickle.load(f)
f.close()

labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [36]:
filenames=test_generator.filenames
submission=pd.DataFrame({"Filename":[filename.replace('all_classes/','')for filename in filenames],
                      "cultivar":predictions})
submission


Unnamed: 0,Filename,cultivar
0,1000005362.png,PI_180348
1,1000099707.png,PI_276837
2,1000135300.png,PI_92270
3,1000136796.png,PI_329256
4,1000292439.png,PI_145633
...,...,...
23634,999578153.png,PI_152816
23635,999692877.png,PI_197542
23636,999756998.png,PI_152733
23637,999892248.png,PI_152923


In [38]:
#!kaggle competitions submit -c sorghum-id-fgvc-9 -f $submission_name -m "With flow from dataframe generator"
submission_name = "../results/submission-EfficientNetB7-imagenet-20220422-170329.csv"
submission.to_csv(submission_name,index=False)
os.system('kaggle competitions submit -c sorghum-id-fgvc-9 -f '+submission_name+' -m "EfficientNetB7 with CLAHE PreProc"')

100%|██████████| 563k/563k [00:01<00:00, 303kB/s] 


Successfully submitted to Sorghum -100 Cultivar Identification - FGVC 9

0