In [1]:
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from tensorflow.keras.applications import ResNet50V2
tf.__version__

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


'2.6.0-dev20210402'

In [2]:
PATH = 'data'

os.listdir(PATH)

['image',
 'sample_submission.csv',
 'subset.csv',
 'test.csv',
 'train.csv',
 'train_split.csv',
 'val_split.csv']

In [3]:
train = pd.read_csv(f'{PATH}/subset.csv')
val = pd.read_csv(f'{PATH}/val_split.csv')
train.shape, val.shape

((2220, 8), (10932, 8))

In [4]:
BATCH_SIZE = 64

def decode(name, label):
    img = tf.io.read_file(name)
    img = tf.image.decode_jpeg(img,channels=3)
    img = tf.cast(img, tf.float32)
    return img,label


def load_ds(df):
    imgs , labels = df["image_name"].values, df["target"].values
    imgs = [f'{PATH}/image/train/{name}.jpg' for name in imgs]
    ds = tf.data.Dataset.from_tensor_slices((imgs,labels))
    ds = ds.map(decode)
    ds = ds.shuffle(2048)
    ds = ds.batch(BATCH_SIZE)
    return ds

In [5]:
train_ds = load_ds(train)
val_ds = load_ds(train)

In [6]:
IMAGE_SIZE = (224,224,3)

encoder = ResNet50V2(
    include_top=False,
    input_shape=IMAGE_SIZE,
    weights='imagenet'
)
encoder.trainable=False

inputs = keras.Input(shape=IMAGE_SIZE)
x = keras.layers.experimental.preprocessing.Rescaling(1./255)(inputs)
x = encoder(x, training = False)
x = keras.layers.GlobalAveragePooling2D()(x)
ouputs = keras.layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(inputs, ouputs)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
rescaling (Rescaling)        (None, 224, 224, 3)       0         
_________________________________________________________________
resnet50v2 (Functional)      (None, 7, 7, 2048)        23564800  
_________________________________________________________________
global_average_pooling2d (Gl (None, 2048)              0         
_________________________________________________________________
dense (Dense)                (None, 1)                 2049      
Total params: 23,566,849
Trainable params: 2,049
Non-trainable params: 23,564,800
_________________________________________________________________


In [7]:
model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=[keras.metrics.AUC(name="auc")]
)

In [8]:
model.fit(train_ds, epochs=10, validation_data=val_ds, validation_steps=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x2121b7d4460>

In [12]:
test = pd.read_csv(f'{PATH}/test.csv')
BATCH_SIZE = 64

def decode_test(name):
    img = tf.io.read_file(name)
    img = tf.image.decode_jpeg(img,channels=3)
    img = tf.cast(img, tf.float32)
    return img


def load_test_ds(df):
    imgs  = df["image_name"].values
    imgs = [f'{PATH}/image/test/{name}.jpg' for name in imgs]
    ds = tf.data.Dataset.from_tensor_slices(imgs)
    ds = ds.map(decode_test)
    ds = ds.batch(BATCH_SIZE)
    return ds

In [13]:
test_ds = load_test_ds(test)

In [14]:
preds = model.predict(test_ds)

In [15]:
preds

array([[0.00542271],
       [0.00323793],
       [0.00853342],
       ...,
       [0.01302326],
       [0.00194523],
       [0.01850507]], dtype=float32)

In [16]:
submission = pd.DataFrame({
    'image_name':test['image_name'].values,
    'target':preds.ravel()
})

In [17]:
submission.to_csv("submission.csv", index=False)