In [20]:
import tensorflow as tf
import tensorflow.keras as tfk
import tensorflow.keras.layers as tfkl
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
def process_file(file):
    classes = []
    names = []
    with open(file, 'r') as f:
        header = True
        for line in f:
            if header:
                header = False
                continue
            n, c = line.split(',')
            names.append(n)
            classes.append(c)
    return np.array(names), np.array(classes)

In [3]:
names, labels = process_file('train_labels.txt')
labels = np.array([int(x) for x in labels])
names = np.array(names)
eval_names, _ = process_file('sample_submission.txt')
print(f"{len(names)} training examples.")
print(f"{len(eval_names)} testing examples.")

17000 training examples.
5149 testing examples.


In [4]:
def read_image(name):
    img = cv.imread(f"data/{name}.png")
    return img

In [5]:
images = np.array([read_image(name) for name in names])
print(images.shape)

(17000, 224, 224, 3)


In [15]:
mobilenet_model = tfk.applications.mobilenet.MobileNet(
    alpha=0.5, include_top=False, pooling='avg',
    input_shape=(224, 224, 3))
for layer in mobilenet_model.layers:
    layer.trainable = False
    
model = tfk.Sequential([
    mobilenet_model,
    tfkl.Dense(128, activation='relu'),
    tfkl.Dense(1, activation='sigmoid'),
])

print(model.summary())

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenet_0.50_224 (Model)   (None, 512)               829536    
_________________________________________________________________
dense_4 (Dense)              (None, 128)               65664     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 129       
Total params: 895,329
Trainable params: 65,793
Non-trainable params: 829,536
_________________________________________________________________
None


In [None]:
def preprocess_input(X):
    return tfk.applications.mobilenet.preprocess_input(X)

def get_class_weight(p=0.23112):
    return {0: p, 1: 1 - p}

# for p in np.linspace(0.23112, 0.23113, 20):
#     clf = SVC(class_weight=get_class_weight(p))
#     clf.fit(preprocess(emb_train)[:1000], labels_train[:1000])
#     print(p, f1_score(labels_val, clf.predict(preprocess(emb_val))))
model.compile(
    optimizer='adam', 
    loss='binary_crossentropy', 
    class_weight=get_class_weight(),
    metrics=[tfk.metrics.Precision(), tfk.metrics.Recall()]
)
model.fit(preprocess_input(images_train), labels_train, validation_split=0.1)
print(p, f1_score(labels_val, model.predict(preprocess_input(images_val))))

Train on 13770 samples, validate on 1530 samples