In [172]:
!pip install -q imblearn

In [204]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from imblearn.over_sampling import SMOTE
import tensorflow as tf
from tensorflow import keras
from keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import ResNet50, InceptionV3
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Dense, Flatten,Dropout,Conv2D, ReLU, MaxPooling2D,
                                     Activation, BatchNormalization, GlobalAveragePooling2D)


In [174]:
IMAGE_PATH = "images/"
IMAGE_SIZE = 150

In [175]:
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")

In [176]:
train_df["image_id"] = train_df.image_id.apply(lambda x: x+".jpg")
test_df["image_id"] = test_df.image_id.apply(lambda x: x+".jpg")

In [177]:
def train_val_split(df, val_ratio=0.2):
    val_rows = (np.random.rand(int(val_ratio*df.shape[0]))*df.shape[0]).astype(int)
    val_df = df.iloc[val_rows]
    train_df.drop(val_rows, axis=0, inplace=True)
    val_df = val_df.reset_index().drop(["index"], axis=1)
    df = df.reset_index().drop(["index"], axis=1)
    int_dict = {
        "healthy": np.float32,
        "multiple_diseases": np.float32,
        "rust": np.float32,
        "scab": np.float32
    }
    df = df.astype(int_dict)
    val_df = val_df.astype(int_dict)
    return df, val_df

In [178]:
train_df, val_df = train_val_split(train_df)

In [179]:
train_df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_0.jpg,0.0,0.0,0.0,1.0
1,Train_1.jpg,0.0,1.0,0.0,0.0
2,Train_2.jpg,1.0,0.0,0.0,0.0
3,Train_3.jpg,0.0,0.0,1.0,0.0
4,Train_4.jpg,1.0,0.0,0.0,0.0


In [180]:
val_df.head()

Unnamed: 0,image_id,healthy,multiple_diseases,rust,scab
0,Train_560.jpg,0.0,0.0,1.0,0.0
1,Train_1303.jpg,0.0,0.0,0.0,1.0
2,Train_1583.jpg,1.0,0.0,0.0,0.0
3,Train_481.jpg,0.0,0.0,1.0,0.0
4,Train_497.jpg,0.0,0.0,1.0,0.0


In [181]:
labels = list(train_df.keys().drop("image_id"))

In [182]:
labels

['healthy', 'multiple_diseases', 'rust', 'scab']

In [183]:
def print_class_freq(df, labels=labels):
    for col in labels:
        print(f'{col}: {sum(df[col])}')

In [184]:
print_class_freq(train_df)

healthy: 424.0
multiple_diseases: 78.0
rust: 506.0
scab: 483.0


In [185]:
print_class_freq(val_df)

healthy: 99.0
multiple_diseases: 14.0
rust: 132.0
scab: 119.0


In [186]:
def C_SMOTE(train_df=train_df):
    X_train = []
    for image in train_df["image_id"]:
        load_images = load_img(IMAGE_PATH+image, target_size=(IMAGE_SIZE, IMAGE_SIZE))
        img_arr = img_to_array(load_images)
        X_train.append(img_arr)
    y_train = train_df.loc[:, "healthy":].values
    X_train = np.asarray(X_train)
    sm = SMOTE(random_state =45)
    X_train, y_train = sm.fit_resample(X_train.reshape((-1, IMAGE_SIZE*IMAGE_SIZE*3)), y_train)
    X_train.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
    return X_train, y_train

In [187]:
X_train, y_train = C_SMOTE()

In [188]:
print(X_train.shape)
print(y_train.shape)

(2024, 67500)
(2024, 4)


In [189]:
X_train = X_train.reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 3)
print(X_train.shape)

(2024, 150, 150, 3)


In [190]:
def get_valid_generator(test_df=test_df, valid_df=val_df, train_df=train_df, image_dir=IMAGE_PATH,
                       x_col="image_id", y_cols=labels, sample_size=100, batch_size=32, seed=45):
    raw_train_generator = ImageDataGenerator().flow_from_dataframe(
        dataframe=train_df,
        directory=image_dir,
        x_col=x_col,
        y_col=y_cols,
        class_mode="raw",
        batch_size=sample_size,
        shuffle=True,
        target_size=(IMAGE_SIZE, IMAGE_SIZE)
    )
    batch = raw_train_generator.next()
    data_sample = batch[0]
    
    image_generator = ImageDataGenerator(
        featurewise_center=True,
        featurewise_std_normalization=True
    )
    image_generator.fit(data_sample)
    valid_generator = image_generator.flow_from_dataframe(
        dataframe=valid_df,
        directory=image_dir,
        x_col=x_col,
        y_col=y_cols,
        class_mode="raw",
        batch_size=batch_size,
        shuffle=False,
        seed=seed,
        target_size=(IMAGE_SIZE, IMAGE_SIZE)
    )
    test_generator = image_generator.flow_from_dataframe(
        dataframe=test_df,
        directory=image_dir,
        x_col=x_col,
        class_mode=None,
        batch_size=1,
        shuffle=False,
        target_size=(IMAGE_SIZE, IMAGE_SIZE)
    )
    return valid_generator, test_generator

In [191]:
def get_train_generator():
    image_generator = ImageDataGenerator(
        samplewise_center=True,
        samplewise_std_normalization=True
    )
    return image_generator.flow(X_train, y_train, batch_size=32, shuffle=False, seed=45)

In [192]:
train_generator = get_train_generator()
valid_generator, test_generator = get_valid_generator()

Found 1491 validated image filenames.
Found 364 validated image filenames.
Found 1821 validated image filenames.


In [193]:
def compute_class_freqs(labels):

    N = labels.shape[0]
    
    positive_frequencies = np.mean(labels, axis=0)
    negative_frequencies = 1 - positive_frequencies

    return positive_frequencies, negative_frequencies

In [194]:
freq_pos, freq_neg = compute_class_freqs(train_generator.y)

pos_weights = freq_neg
neg_weights = freq_pos

In [195]:
pos_weights

array([0.75, 0.75, 0.75, 0.75])

In [196]:
neg_weights

array([0.25, 0.25, 0.25, 0.25])

In [197]:
def get_weighted_loss(pos_weights, neg_weights, epsilon=1e-7):
    
    def weighted_loss(y_true, y_pred):
        
        # initialize loss to zero
        loss = 0.0
        

        for i in range(len(pos_weights)):
            # for each class, add average weighted loss for that class 
            loss += K.mean(-(pos_weights[i]*y_true[:, i]*K.log(y_pred[:, i]+epsilon)
                             + neg_weights[i]*(1-y_true[:, i])*K.log((1-y_pred[:, i])+epsilon)))
        return loss
    
    return weighted_loss

In [206]:
FILTERS=64
model = Sequential()
model.add(InceptionV3(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, weights="imagenet")),
for i in range(5):
    model.add(Conv2D(filters=FILTERS, kernel_size=3, padding='SAME', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
    model.add(ReLU())
    model.add(Conv2D(filters=FILTERS, kernel_size=3, padding='SAME'))
    model.add(ReLU())
    if i != 4:
        model.add(Conv2D(filters=FILTERS, kernel_size=5, padding='SAME', input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
        model.add(ReLU())
    FILTERS*=2
model.add(MaxPooling2D())
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(16, activation="relu"))
model.add(BatchNormalization())
model.add(Dense(4, activation="softmax"))
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [207]:
model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Model)         (None, 3, 3, 2048)        21802784  
_________________________________________________________________
conv2d_1059 (Conv2D)         (None, 3, 3, 64)          1179712   
_________________________________________________________________
re_lu_14 (ReLU)              (None, 3, 3, 64)          0         
_________________________________________________________________
conv2d_1060 (Conv2D)         (None, 3, 3, 64)          36928     
_________________________________________________________________
re_lu_15 (ReLU)              (None, 3, 3, 64)          0         
_________________________________________________________________
conv2d_1061 (Conv2D)         (None, 3, 3, 64)          102464    
_________________________________________________________________
re_lu_16 (ReLU)              (None, 3, 3, 64)        

In [208]:
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import EarlyStopping
lr = ReduceLROnPlateau(
    monitor="val_accuracy",
    factor = 0.5,
    patience=10,
    min_lr=0.00001,
    verbose=1
)
es = EarlyStopping(
    monitor='val_loss',
    patience=20
)
# model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_filepath,
#     save_weights_only=True,
#     monitor='val_acc',
#     mode='max',
#     verbose=1,
#     save_best_only=True)

In [None]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n//valid_generator.batch_size
history = model.fit(train_generator, 
                   steps_per_epoch=STEP_SIZE_TRAIN, 
                   validation_data = valid_generator,
                   validation_steps=STEP_SIZE_VALID,
                   callbacks=[lr], epochs=200
                  )

  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 63 steps, validate for 11 steps
Epoch 1/200


In [None]:
history.history

In [None]:
test_preds = model.predict_generator(test_generator, steps=len(test_generator))

In [None]:
test_preds.shape

In [None]:
test = pd.read_csv("test.csv")
test['healthy'] = test_preds[:, 0]
test['multiple_diseases'] = test_preds[:, 1]
test['rust'] = test_preds[:, 2]
test['scab'] = test_preds[:, 3]
test.head()
test.to_csv("res50.csv")