In [106]:
import os
import gc

import numpy as np
import pandas as pd
import cv2
import tqdm

from keras import losses
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

In [88]:
X_train = []
X_test = []
y_train = []

In [72]:
pre_filepath = "../../../../../../Volumes/Seagate Backup Plus Drive/Documents/Kaggle Datasets/"

In [73]:
df_train = pd.read_csv(pre_filepath + "Planet/train_v2.csv")

In [7]:
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [57]:
labels

['blow_down',
 'bare_ground',
 'artisinal_mine',
 'haze',
 'conventional_mine',
 'water',
 'partly_cloudy',
 'clear',
 'habitation',
 'primary',
 'cultivation',
 'road',
 'slash_burn',
 'selective_logging',
 'agriculture',
 'blooming',
 'cloudy']

In [59]:
index_map = {i : j for i, j in enumerate(labels)}

In [60]:
index_map

{0: 'blow_down',
 1: 'bare_ground',
 2: 'artisinal_mine',
 3: 'haze',
 4: 'conventional_mine',
 5: 'water',
 6: 'partly_cloudy',
 7: 'clear',
 8: 'habitation',
 9: 'primary',
 10: 'cultivation',
 11: 'road',
 12: 'slash_burn',
 13: 'selective_logging',
 14: 'agriculture',
 15: 'blooming',
 16: 'cloudy'}

In [61]:
label_map = {j : i for i, j in enumerate(labels)}

In [62]:
label_map

{'agriculture': 14,
 'artisinal_mine': 2,
 'bare_ground': 1,
 'blooming': 15,
 'blow_down': 0,
 'clear': 7,
 'cloudy': 16,
 'conventional_mine': 4,
 'cultivation': 10,
 'habitation': 8,
 'haze': 3,
 'partly_cloudy': 6,
 'primary': 9,
 'road': 11,
 'selective_logging': 13,
 'slash_burn': 12,
 'water': 5}

In [96]:
for file_name, tags in tqdm.tqdm(df_train.values, miniters=100):
    
    image = cv2.imread(pre_filepath + "Planet/train-tif-v2/" + file_name + ".tif")
    targets = np.zeros(17)
    for tag in tags.split(' '):
        targets[label_map[tag]] = 1
    X_train.append(cv2.resize(image, (64, 64)))
    y_train.append(targets)

100%|██████████| 40479/40479 [32:41<00:00, 20.64it/s]


In [97]:
X_train = np.array(X_train, np.float16) / 255
y_train = np.array(y_train, np.uint8)

In [99]:
X_train.shape

(40481, 64, 64, 3)

In [100]:
y_train.shape

(40481, 17)

In [101]:
validation_split = 35000

In [102]:
X_train, X_valid = X_train[:validation_split], X_train[validation_split:]

In [103]:
y_train, y_valid = y_train[:validation_split], y_train[validation_split:]

In [110]:
# Making the Keras model
model = Sequential()

# First conv layer
model.add(Conv2D(32, kernel_size=(3,3), activation='relu', input_shape=(64, 64, 3)))
model.add(MaxPooling2D())
model.add(Dropout(0.8))

# Second conv layer
model.add(Conv2D(64, kernel_size=(3,3), activation='relu'))
model.add(MaxPooling2D())
model.add(Dropout(0.8))

# Flatten before FC layers
model.add(Flatten())

# Third FC layer
model.add(Dense(1024, activation='relu'))
model.add(Dropout(0.8))

# Final FC layer to output
model.add(Dense(17, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, batch_size = 128, epochs=4, verbose=1, validation_data=(X_valid, y_valid))

Train on 35000 samples, validate on 5481 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x120ede940>

In [112]:
# Evaluate the model 
from sklearn.metrics import fbeta_score

p_valid = model.predict(X_valid, batch_size=128)
print(y_valid)
print(p_valid)
print(fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples'))

[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 1 0 0]
 [0 0 0 ..., 1 0 0]
 [0 0 0 ..., 1 0 0]]
[[ 0.01141668  0.0324084   0.01230208 ...,  0.37550899  0.02821972
   0.0115757 ]
 [ 0.00633154  0.04981474  0.01279089 ...,  0.53140497  0.01163766
   0.09105454]
 [ 0.00636821  0.01809446  0.00608225 ...,  0.30736524  0.01875447
   0.00509066]
 ..., 
 [ 0.0041061   0.12024331  0.04095884 ...,  0.67764372  0.00425965
   0.13108748]
 [ 0.00361769  0.10869204  0.03326196 ...,  0.68432719  0.00375857
   0.12586874]
 [ 0.01118554  0.04454163  0.0158408  ...,  0.48249     0.02261144
   0.02048526]]
0.742743734914
