In [1]:
import pathlib
import tensorflow as tf

In [2]:
data_folder = "../data/"

In [3]:
import pandas as pd
import os
import re

In [4]:
df_train = pd.read_csv(os.path.join(data_folder, "train.csv"))

In [5]:
df_val = pd.read_csv(os.path.join(data_folder, "val.csv"))

In [6]:
df_train['ImageID'] = df_train['ImageID'].astype(str)+".jpg"
df_val['ImageID'] = df_val['ImageID'].astype(str)+".jpg"

In [7]:
df_train.head()

Unnamed: 0,ImageID,label
0,0.jpg,curiosity
1,1.jpg,curiosity
2,2.jpg,curiosity
3,3.jpg,perseverance
4,4.jpg,curiosity


In [8]:
df_val.head()

Unnamed: 0,ImageID,label
0,0.jpg,curiosity
1,1.jpg,curiosity
2,2.jpg,curiosity
3,3.jpg,perseverance
4,4.jpg,curiosity


In [9]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [10]:
datagen=ImageDataGenerator(rescale=1./255.)

train_generator=datagen.flow_from_dataframe(
dataframe=df_train,
directory="../data/train/",
x_col="ImageID",
y_col="label",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(128,128))

Found 40000 validated image filenames belonging to 2 classes.


In [11]:
val_generator=datagen.flow_from_dataframe(
dataframe=df_val,
directory="../data/val/",
x_col="ImageID",
y_col="label",
batch_size=32,
seed=42,
shuffle=True,
class_mode="categorical",
target_size=(128,128))

Found 4000 validated image filenames belonging to 2 classes.


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Conv2D, Flatten, Dropout, MaxPooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers, optimizers
import os
import numpy as np
import pandas as pd

In [13]:
class CustomAugment(object):
    def __call__(self, image):        
        # Random flips and grayscale with some stochasticity
        img = self._random_apply(tf.image.flip_left_right, image, p=0.5)
        img = self._random_apply(self._color_drop, img, p=0.8)
        return img

    def _color_drop(self, x):
        image = tf.image.rgb_to_grayscale(x)
        image = tf.tile(x, [1, 1, 1, 3])
        return x
    
    def _random_apply(self, func, x, p):
        return tf.cond(
          tf.less(tf.random.uniform([], minval=0, maxval=1, dtype=tf.float32),
                  tf.cast(p, tf.float32)),
          lambda: func(x),
          lambda: x)

In [14]:
data_augmentation = tf.keras.Sequential(
  [
     tf.keras.layers.Lambda(CustomAugment()),
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal", 
                                                 input_shape=(128, 
                                                              128,
                                                              3)),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.1),
    tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
  ]
)

In [15]:
model = Sequential()
model.add(data_augmentation)
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(128,128,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

model.compile(optimizers.RMSprop(lr=0.0001), loss="categorical_crossentropy", metrics=["Recall", "Precision"])

In [16]:
next(train_generator)[0].shape

(32, 128, 128, 3)

In [17]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VAL=val_generator.n//train_generator.batch_size

In [41]:
model.fit(train_generator, steps_per_epoch=STEP_SIZE_TRAIN, validation_data=val_generator, validation_steps=STEP_SIZE_VAL, epochs=1)



KeyboardInterrupt: 

In [19]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
sequential (Sequential)      (None, None, None, None)  0         
_________________________________________________________________
conv2d (Conv2D)              (None, None, None, 32)    896       
_________________________________________________________________
activation (Activation)      (None, None, None, 32)    0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, None, None, 32)    9248      
_________________________________________________________________
activation_1 (Activation)    (None, None, None, 32)    0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, None, None, 32)    0         
_________________________________________________________________
dropout (Dropout)            (None, None, None, 32)   

In [20]:
df_test = pd.read_csv("../data/sample_submission.csv",dtype=str)
df_test["ImageID"] = df_test["ImageID"].astype(str)+".jpg"

In [26]:
len(df_test)

10000

In [27]:
test_generator=datagen.flow_from_dataframe(
dataframe=df_test,
directory="../data/test/",
x_col="ImageID",
y_col="label",
batch_size=1,
seed=42,
shuffle=False,
class_mode="categorical",
target_size=(128,128))

Found 10000 validated image filenames belonging to 2 classes.


In [28]:
model

<tensorflow.python.keras.engine.sequential.Sequential at 0x7fee1b231ac0>

In [29]:
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

In [30]:
STEP_SIZE_TEST

10000

In [31]:
test_generator.reset()
pred = model.predict(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)



In [32]:
pred

array([[9.32831109e-01, 6.71688318e-02],
       [9.99896049e-01, 1.03994345e-04],
       [9.51828897e-01, 4.81711291e-02],
       ...,
       [9.99866724e-01, 1.33238966e-04],
       [9.99783814e-01, 2.16226326e-04],
       [9.99733865e-01, 2.66095012e-04]], dtype=float32)

In [33]:
predicted_class_indices = np.argmax(pred,axis=1)

In [34]:
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

In [35]:
df_test["pred"] = predictions

In [36]:
df_test.head()

Unnamed: 0,ImageID,label,pred
0,0.jpg,curiosity,curiosity
1,1.jpg,perseverance,curiosity
2,2.jpg,curiosity,curiosity
3,3.jpg,perseverance,perseverance
4,4.jpg,perseverance,curiosity


In [37]:
df_test.drop("label", axis=1, inplace=True)
df_test.rename(columns={"pred": "label"}, inplace=True)

In [38]:
df_test.head()

Unnamed: 0,ImageID,label
0,0.jpg,curiosity
1,1.jpg,curiosity
2,2.jpg,curiosity
3,3.jpg,perseverance
4,4.jpg,curiosity


In [None]:
df_test["ImageID"] = df_test["ImageID"].map(lambda x: re.sub(r"\D", "", str(x)))

In [39]:
df_test.to_csv("../data/01_sub.csv", index=False)

In [40]:
!aicrowd submission create -c rover-classification -f ../data/01_sub.csv

[2K[1;34m01_sub.csv[0m [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100.0%[0m • [32m200.5/198.9 KB[0m • [31m2.5 MB/s[0m • [36m0:00:00[0m[0m • [36m0:00:01[0m[36m0:00:01[0m
[?25h                                    ╭─────────────────────────╮                                     
                                    │ [1mSuccessfully submitted![0m │                                     
                                    ╰─────────────────────────╯                                     
[3m                                          Important links                                           [0m
┌──────────────────┬───────────────────────────────────────────────────────────────────────────────┐
│  This submission │[1;94m [0m[1;94mhttps://www.aicrowd.com/challenges/ai-blitz-7/submissions/126494             [0m[1;94m [0m│
│                  │                                                                               │
│  All submissions │[1;94m [0m[1;94mhttps://