# Baseline model

## Import and load data

In [1]:
import cv2
import numpy as np
import os
import pandas as pd

from pathlib import Path

In [2]:
data_path = Path('../data')
print(os.listdir(data_path))

['traininglabels.csv', '.gitkeep', 'train_images.zip', 'leaderboard_test_data.zip', 'widsdatathon2019.zip', 'SampleSubmission.csv', 'leaderboard_holdout_data.zip', 'train_images']


In [3]:
labels = data_path/'traininglabels.csv'
df = pd.read_csv(labels)

In [4]:
df.head()

Unnamed: 0,image_id,has_oilpalm,score
0,img_000002017.jpg,0,0.7895
1,img_000012017.jpg,0,1.0
2,img_000022017.jpg,0,1.0
3,img_000072017.jpg,0,1.0
4,img_000082017.jpg,0,1.0


In [5]:
def img_as_array(image_id, size=(64, 64)):
    image_path = data_path/'train_images'/image_id 
    img = cv2.imread(str(image_path))
    return cv2.resize(img, size)

In [15]:
images = []
responses = []
for idx, row in df.iterrows():
    img = img_as_array(row[0])
    images.append(img.reshape(1, 64, 64, 3))
    responses.append(row[1])

In [16]:
images = np.concatenate(images, axis=0)
responses = np.array(responses).reshape(images.shape[0], 1)

In [17]:
responses.shape

(15244, 1)

In [18]:
images.shape

(15244, 64, 64, 3)

## Define model architecture

In [19]:
from keras.applications import vgg16
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential, Model
from keras.optimizers import SGD

Using TensorFlow backend.


In [20]:
def baseline_vgg():
    vgg = vgg16.VGG16(include_top=False, weights='imagenet', input_shape=(64, 64,3), pooling='max')
    last = vgg.output
    # x = Flatten()(last)
    x = Dense(256, activation='relu')(last)
    x = Dropout(0.5)(x)
    x = Dense(1, activation='sigmoid')(x)
    return Model(inputs=[vgg.input], outputs=[x])

model = baseline_vgg()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 64, 64, 3)         0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 64, 64, 64)        1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 64, 64, 64)        36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 32, 32, 64)        0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 32, 32, 128)       73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 32, 32, 128)       147584   

In [None]:
model.fit(images, responses, batch_size=64, epochs=10)

In [None]:
model.save('vgg_baseline_100epoch.h5')