### 1. Loading Dependencies and Dataset 

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf, tensorflow.keras.backend as K
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Model
from tensorflow.keras import optimizers
from tensorflow.keras.optimizers import Adam
import efficientnet.tfkeras as efn
from tensorflow.keras.layers import Dense,Dropout,Input,Reshape,Lambda, GlobalAveragePooling2D, Concatenate, Multiply

In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"

In [3]:
#TPU Configurations
AUTO = tf.data.experimental.AUTOTUNE
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)


REPLICAS:  1


In [4]:
IMG_SIZE_h = 128    
IMG_SIZE_w = 128
channel = 3
BATCH_SIZE = 32*strategy.num_replicas_in_sync
num_classes = 2

In [84]:
#Train Test Split
from sklearn.model_selection import train_test_split
import os
from pathlib import Path
import cv2
test_path="./data/test"
train_path="./data/train"

test_set = [str(file) for file in Path(test_path).rglob(f'*.png')]
train_set = [str(file) for file in Path(train_path).rglob(f'*.png')]

images =[]
labels = []

image_path = []

for file in test_set:
    image = cv2.imread(file)
    image = cv2.resize(image, (IMG_SIZE_h,IMG_SIZE_w))
    images.append(image)

    image_path.append(file)
    labels.append(0)
    
for file in train_set:
    image = cv2.imread(file)
    image = cv2.resize(image, (IMG_SIZE_h,IMG_SIZE_w))

    images.append(image)
    image_path.append(file)
    labels.append(1)

images = np.array(images, dtype=np.float32) / 255.0
labels = np.array(labels, dtype=np.float32)


In [7]:
def decode_image(filename, label=None, image_size=(IMG_SIZE_h, IMG_SIZE_w)):
    bits = tf.io.read_file(filename)
    image = tf.image.decode_png(bits, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.resize(image, image_size)
    
    #convert to numpy and do some cv2 staff mb?
    
    if label is None:
        return image
    else:
        return image, label

def data_augment(image, label=None, seed=5050):
    image = tf.image.random_flip_left_right(image, seed=seed)
    image = tf.image.random_flip_up_down(image, seed=seed)
    image = tf.image.random_crop(image,size=[IMG_SIZE_h,IMG_SIZE_w,3],seed=seed )
    image = tf.image.random_brightness(image,max_delta=0.5, seed=seed )
           
    if label is None:
        return image
    else:
        return image, label

In [85]:
x_train,x_val,y_train,y_val = train_test_split(image_path,np.array(labels),test_size=0.198,shuffle=True) 

In [9]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((x_train, y_train))
    .map(decode_image, num_parallel_calls=AUTO)
    .map(data_augment, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
    )

In [10]:
val_dataset = (tf.data.Dataset
               .from_tensor_slices((x_val,y_val))
               .map(decode_image,num_parallel_calls=AUTO)
               .batch(BATCH_SIZE)
               .cache()
               .prefetch(AUTO)
              )

### 2. Functions

#### i)Focal Loss + Label Smoothing

In [86]:
import keras.backend as K
import tensorflow as tf

def categorical_focal_loss_with_label_smoothing(gamma=2.0, alpha=0.25,ls=0.1,classes=2):
    """
    Implementation of Focal Loss from the paper in multiclass classification
    Formula:
        loss = -alpha*((1-p)^gamma)*log(p)
        y_ls = (1 - α) * y_hot + α / classes
    Parameters:
        alpha -- the same as wighting factor in balanced cross entropy
        gamma -- focusing parameter for modulating factor (1-p)
        ls    -- label smoothing parameter(alpha)
        classes     -- No. of classes
    Default value:
        gamma -- 2.0 as mentioned in the paper
        alpha -- 0.25 as mentioned in the paper
        ls    -- 0.1
        classes     -- 4
    """
    def focal_loss(y_true, y_pred):
        # Define epsilon so that the backpropagation will not result in NaN
        # for 0 divisor case
        epsilon = K.epsilon()
        # Add the epsilon to prediction value
        #y_pred = y_pred + epsilon
        #label smoothing
        y_pred_ls = (1 - ls) * y_pred + ls / classes
        # Clip the prediction value
        y_pred_ls = tf.clip_by_value(y_pred_ls, epsilon, 1.0-epsilon)
        # Calculate cross entropy
        cross_entropy = -y_true*tf.math.log(y_pred_ls)
        # Calculate weight that consists of  modulating factor and weighting factor
        weight = alpha * y_true * tf.math.pow((1-y_pred_ls), gamma)
        # Calculate focal loss
        loss = weight * cross_entropy
        # Sum the losses in mini_batch
        loss = tf.math.reduce_sum(loss, axis=1)
        return loss
    
    return focal_loss
# def categorical_focal_loss_with_label_smoothing(gamma=2., alpha=0.75, ls=0.125, classes=10):
#     def focal_loss(y_true, y_pred):
#         y_true = K.one_hot(K.cast(y_true, 'int32'), num_classes=classes)
#         y_true = (1 - ls) * y_true + ls / classes
#         cross_entropy = -y_true * K.log(y_pred)
#         loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy
#         return K.sum(loss, axis=1)
#     return focal_loss

#### ii) BiLinear Layer (outer_product())

In [87]:
import tensorflow as tf


def outer_product(x):
    #Einstein Notation  [batch,1,1,depth] x [batch,1,1,depth] -> [batch,depth,depth]
    phi_I = tf.einsum('ijkm,ijkn->imn',x[0],x[1])
    
    # Reshape from [batch_size,depth,depth] to [batch_size, depth*depth]
    phi_I = tf.reshape(phi_I,[-1,x[0].shape[3]*x[1].shape[3]])
    
    # Divide by feature map size [sizexsize]
    size1 = int(x[1].shape[1])
    size2 = int(x[1].shape[2])
    phi_I = tf.divide(phi_I, size1*size2)
    
    # Take signed square root of phi_I
    y_ssqrt = tf.multiply(tf.sign(phi_I),tf.sqrt(tf.abs(phi_I)+1e-12))
    
    # Apply l2 normalization
    z_l2 = tf.nn.l2_normalize(y_ssqrt, axis=1)
    return z_l2

def get_output_shape(input_shape):
    # Calculate the output shape based on the outer product operation
    # Assuming input_shape is (batch_size, 3584)
    return (input_shape[0], input_shape[0][1] * input_shape[0][2])

#### iii)F1 Score

In [None]:
from keras import backend as K

def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """

        true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))
        possible_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """

        true_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_true * y_pred, 0, 1)))
        predicted_positives = tf.math.reduce_sum(tf.math.round(tf.clip_by_value(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

### 3. Model

In [89]:
def get_model():
    # Define input
    input_shape = (IMG_SIZE_h, IMG_SIZE_w, channel)
    input_tensor = Input(shape=(IMG_SIZE_h, IMG_SIZE_w, channel))

    # Create EfficientNet backbones
    base_model1 = efn.EfficientNetB4(weights='imagenet', include_top=False, input_shape=input_shape, )
    base_model2 = efn.EfficientNetB4(weights='noisy-student', include_top=False, input_shape=input_shape, )

    base_model1.name = "EfficientNetB4_imagenetWeight"
    base_model1.name = "EfficientNetB4_noisy-studentWeight"
    for layer in base_model1.layers:
        layer.name = 'model1_' + layer.name

    for layer in base_model2.layers:
        layer.name = 'model2_' + layer.name

    x1 = base_model1(input_tensor)
    x2 = base_model2(input_tensor)

    # Get the output features from both models and apply global pooling
    d1 = GlobalAveragePooling2D()(x1)  # This will be shape (batch_size, 1792) for EfficientNetB4
    d2 = GlobalAveragePooling2D()(x2)  # This will be shape (batch_size, 1792) for EfficientNetB4

    # Instead of outer product, use a simpler approach to combine features
    # Option 1: Concatenate features
    combined_features = Concatenate()([d1, d2])  # Shape will be (batch_size, 3584)

    # Option 2: Element-wise multiplication (if you want interaction between features)
    # combined_features = Multiply()([d1, d2])  # Shape will be (batch_size, 1792)

    # Add an intermediate dense layer to reduce dimensionality if needed
    intermediate = Dense(512, activation='relu')(combined_features)

    # Final prediction layer
    predictions = Dense(1, activation='sigmoid', name='predictions')(intermediate)

    # Create the full model
    model = Model(inputs=input_tensor, outputs=predictions)

    # Now compile the combined model
    model.compile(
        optimizer=Adam(learning_rate=0.0003, decay=1e-3),
        loss=categorical_focal_loss_with_label_smoothing(gamma=2.0, alpha=0.75, ls=0.125, classes=num_classes)
    )

    return model

In [90]:

opt = Adam(learning_rate=0.0003, decay=1e-3)

model = get_model()

model.compile(
    optimizer=Adam(learning_rate=0.0003, decay=1e-3), 
    loss=categorical_focal_loss_with_label_smoothing(gamma=2.0, alpha=0.75, ls=0.125, classes=num_classes),
    metrics=[f1,'categorical_accuracy']
    )

### 4. Training

In [91]:
history = model.fit(x = images,
                    y = labels,
                    epochs=30,
                    verbose=1,
                    validation_split=0.1
                    )
#it will take some time to start training

Epoch 1/30
Tensor("data_1:0", shape=(None,), dtype=float32)
Tensor("functional_15_1/predictions_1/Sigmoid:0", shape=(None, 1), dtype=float32)
Tensor("data_1:0", shape=(None,), dtype=float32)
Tensor("functional_15_1/predictions_1/Sigmoid:0", shape=(None, 1), dtype=float32)
Tensor("data_1:0", shape=(None,), dtype=float32)
Tensor("functional_15_1/predictions_1/Sigmoid:0", shape=(None, 1), dtype=float32)
Tensor("data_1:0", shape=(None,), dtype=float32)
Tensor("functional_15_1/predictions_1/Sigmoid:0", shape=(None, 1), dtype=float32)
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - categorical_accuracy: 0.8107 - f1: 25.0202 - loss: 0.0390Tensor("data_1:0", shape=(None,), dtype=float32)
Tensor("functional_15_1/predictions_1/Sigmoid:0", shape=(None, 1), dtype=float32)
Tensor("data_1:0", shape=(None,), dtype=float32)
Tensor("functional_15_1/predictions_1/Sigmoid:0", shape=(None, 1), dtype=float32)
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m180s[0m 5s/step - 

KeyboardInterrupt: 

In [None]:
%matplotlib inline 

import matplotlib as mpl
import matplotlib.pyplot as plt


print ('Matplotlib version: ', mpl.__version__) # >= 2.0.0

val_f1 = history.history['val_f1']
f1 = history.history['f1']
epochs = range(len(f1))

df_categorical_accuracy = pd.DataFrame(val_f1, columns = ['val_f1']) 
df_f1 = pd.DataFrame(f1, columns = ['f1'])

df_categorical_accuracy.to_csv('val_f1.csv')
df_f1.to_csv('f1.csv')

In [92]:
model.summary()

In [None]:
f, ax = plt.subplots(figsize=(12,4)) # set the size that you'd like (width, height)
plt.title('F1 Score')
plt.ylabel('f1 score')
plt.xlabel('Epochs')
plt.plot(epochs,val_f1,label='Validation F1 Score')
plt.plot(epochs, f1,label='Training F1 Score')
plt.legend()
plt.figure()
plt.savefig('F1.png')
plt.show()

### 5. Testing and Saving Model 

In [None]:
path='../input/plant-pathology-2020-fgvc7/'

test = pd.read_csv(path+'test.csv')
test_id = test['image_id']

root = 'images'
x_test = [(os.path.join(GCS_DS_PATH,root,idee+'.jpg')) for idee in test_id]

In [None]:
test_dataset = (
    tf.data.Dataset
    .from_tensor_slices(x_test)
    .map(decode_image, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
)

In [None]:
y_pred = model.predict(test_dataset,verbose=1)

In [None]:
def save_results(y_pred):
    
    path='../input/plant-pathology-2020-fgvc7/'
    test = pd.read_csv(path + 'test.csv')
    test_id = test['image_id']

    res = pd.read_csv(path+'train.csv')
    res['image_id'] = test_id
  
    labels = res.keys()

    for i in range(1,5):
        res[labels[i]] = y_pred[:,i-1]

    res.to_csv('submission.csv',index=False)
  
    print(res.head)

In [None]:
save_results(y_pred)

In [None]:
model_json = model.to_json()
with open("Model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("Model.h5")

In [24]:
from tensorflow.python.keras.models import model_from_json

# load json and create model
json_file = open('Model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("Model.h5")
# loaded_model.summary()

FileNotFoundError: [Errno 2] No such file or directory: 'Model.json'