In [1]:
import pandas as pd
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from tqdm import tqdm 
import cv2
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow import keras
from keras import backend as K
from sklearn.metrics import fbeta_score
from keras.layers import Conv2D, Dense, LSTM, Flatten, MaxPooling2D, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop, SGD

In [2]:
submission_csv = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
submission_csv.sample(2)

In [3]:
train_classes = pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv')
train_classes.sample(2)

In [4]:
train_classes['tags'].value_counts()

In [5]:
img = mpimg.imread('../input/planets-dataset/planet/planet/train-jpg/train_1.jpg')
plt.imshow(img)

In [6]:
img.shape

In [7]:
train_classes.shape

In [8]:
#encoding labels using label encoder 
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
label_maps = pd.DataFrame(['agriculture', 'artisinal_mine', 'bare_ground',\
                      'blooming','blow_down','clear','cloudy',\
                      'conventional_mine','cultivation','habitation','haze', \
                      'partly_cloudy','primary','road','selective_logging',\
                      'slash_burn','water'], columns = ['tags'])
label_maps['map'] = encoder.fit_transform(label_maps)
label_maps.set_index('tags',inplace = True)
label_maps = label_maps.to_dict()
label_maps

In [9]:
#defining a dict of encoded labels
label_map = {'agriculture': 0,
 'artisinal_mine': 1,
 'bare_ground': 2,
 'blooming': 3,
 'blow_down': 4,
 'clear': 5,
 'cloudy': 6,
 'conventional_mine': 7,
 'cultivation': 8,
 'habitation': 9,
 'haze': 10,
 'partly_cloudy': 11,
 'primary': 12,
 'road': 13,
 'selective_logging': 14,
 'slash_burn': 15,
 'water': 16}

In [10]:
# I'm about to load training images
X = []
Y = []
train_classes = shuffle(train_classes,random_state=0)
for image_name, tags in tqdm(train_classes.values, miniters=400):
    arr = cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}.jpg'.\
                     format(image_name), cv2.IMREAD_UNCHANGED)
    targets = np.zeros(17)
    for t in tags.split(' '):
      targets[label_map[t]] = 1 
    arr = cv2.resize(arr, (64, 64))
    X.append(arr)
    Y.append(targets)   

X = np.array(X, np.float16)/255.0

In [11]:
#splitting into training and validation sets
X = np.array(X)
Y = np.array(Y)
x_train, x_val, y_train, y_val = train_test_split(X, Y, test_size = 0.2, shuffle = True, random_state = 1)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)

In [12]:
import gc
gc.collect()

In [13]:
def fbeta(y_true, y_pred, threshold_shift=0):
    beta = 2

    # just in case of hipster activation at the final layer
    y_pred = K.clip(y_pred, 0, 1)

    # shifting the prediction threshold from .5 if needed
    y_pred_bin = K.round(y_pred + threshold_shift)

    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))
    
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)

    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

In [14]:
#with tpu_strategy.scope():
model = keras.Sequential()
model.add(Conv2D(64, 5, 2, activation = "relu", input_shape = (64, 64, 3)))
model.add(MaxPooling2D())
model.add(Conv2D(128, 5, 2, activation = "relu"))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(512, activation = "relu"))
model.add(Dense(17, activation = "sigmoid"))
model.compile(loss = "binary_crossentropy", optimizer = Adam(), metrics = [fbeta])
model.fit(x_train, y_train, validation_data = (x_val, y_val), epochs = 45, batch_size = 64)

In [15]:
model.evaluate(x_val, y_val)
print("train fscore: ", fbeta_score(y_train, np.round(model.predict(x_train)), 2,average = 'weighted'))
print("val fscore: ", fbeta_score(y_val, np.round(model.predict(x_val)), 2, average = 'weighted'))

In [16]:
#dividing my test_labels into two part for test-jpg and test-jpg-additional
test = submission_csv[0 : 40669]
files = submission_csv[40669 : ]

In [17]:
import gc
gc.collect()

In [18]:
#with tpu_strategy.scope():   
test_img = []

for image_name, tags in tqdm(test.values, miniters=1000):
    arr = cv2.imread('../input/planets-dataset/planet/planet/test-jpg/{}.jpg'.format(image_name))
    test_img.append(cv2.resize(arr, (64, 64)))

for image_name, tags in tqdm(files.values, miniters=1000):
    arr = cv2.imread('../input/planets-dataset/test-jpg-additional/test-jpg-additional/{}.jpg'.format(image_name))
    test_img.append(cv2.resize(arr, (64, 64)))

test_img = np.array(test_img, np.float16)/255.0

In [19]:
#with tpu_strategy.scope():
yres = []
predictions = model.predict(test_img, batch_size = 64, verbose = 2)
yres.append(predictions)

In [20]:
#converting my encoded labels back to it original form
sub = np.array(yres[0])
for i in range (1, len(yres)):
    sub += np.array(yres[i])
sub = pd.DataFrame(sub, columns = label_map)

In [21]:
#saving my final result into a csv file
preds = []
for i in tqdm(range(sub.shape[0]), miniters=1000):
    a = sub.loc[[i]]
    a = a.apply(lambda x: x > 0.2, axis=1)
    a = a.transpose()
    a= a.loc[a[i] == True]
    ' '.join(list(a.index))
    preds.append(' '.join(list(a.index)))

new_submission = submission_csv
new_submission['tags'] = preds
new_submission.to_csv('submission.csv', index=False)

In [22]:
new_submission