In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# importing required libraries
import matplotlib.pyplot as plt
%matplotlib inline
import pathlib
import cv2
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import fbeta_score
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import gc

In [3]:
#importing the data
train_images = pd.read_csv("/kaggle/input/planets-dataset/planet/planet/train_classes.csv")
test_images = pd.read_csv("/kaggle/input/planets-dataset/planet/planet/sample_submission.csv")
train_images.head()

In [4]:
test_images.head()

In [5]:
# Load the file path

train_img_dir = pathlib.Path('/kaggle/input/planets-dataset/planet/planet/train-jpg')
test_img_dir = pathlib.Path('/kaggle/input/planets-dataset/planet/planet/test-jpg')
test_add_img_dir = pathlib.Path('/kaggle/input/planets-dataset/test-jpg-additional/test-jpg-additional')

train_img_path = sorted(list(train_img_dir.glob('*.jpg')))

train_img_count = len(train_img_path)
print("Number of images for training",str(train_img_count))

In [6]:
# test images path
test_img_path = sorted(list(test_img_dir.glob('*.jpg')))

test_img_count = len(test_img_path)
print("Number of images for testing",str(test_img_count))

In [7]:
#additional images
test_add_img_path = sorted(list(test_add_img_dir.glob('*.jpg')))

test_add_img_count = len(test_add_img_path)
print("Number of images for additional testing",str(test_add_img_count))

In [8]:
#checking for size of images 
img = cv2.imread("/kaggle/input/planets-dataset/planet/planet/test-jpg/test_18979.jpg",cv2.IMREAD_UNCHANGED)
 
print('Original Dimensions : ', img.shape)
 


In [9]:
#Resizing the images
scale_percent = 25
width = int(img.shape[1] * scale_percent / 100)
height = int(img.shape[0] * scale_percent / 100)
dim = (width, height)
# resize image
resized = cv2.resize(img, dim, interpolation = cv2.INTER_AREA)
 
print('Resized Dimensions : ',resized.shape)

In [10]:
#storing the data as numpy array for easier manipulation
x_train = []
y_train = []
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in train_images['tags'].values])))
label_map = {l: i for i, l in enumerate(labels)}
for f, tags in tqdm(train_images.values, miniters=1000):
    img = cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}.jpg'.format(f))
    img =resized
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1
    x_train.append(img)
    y_train.append(targets)
        
x_train = np.array(x_train, np.float32)
y_train = np.array(y_train, np.uint8)

print(x_train.shape)
print(y_train.shape)

In [11]:
#merging additional images into the test images as images_test
gc.collect()
images_test = []

test_image_names = os.listdir(test_img_dir)

n_test = len(test_image_names)
test_classes = test_images.iloc[:n_test, :]
add_classes = test_images.iloc[n_test:, :]

test_add_image_names = os.listdir(test_add_img_dir)

for img_name, _ in tqdm(test_classes.values, miniters=1000):
    img = cv2.imread(str(test_img_dir) + '/{}.jpg'.format(img_name))
    images_test.append(resized)
    
for img_name, _ in tqdm(add_classes.values, miniters=1000):
    img = cv2.imread(str(test_add_img_dir) + '/{}.jpg'.format(img_name))
    images_test.append(resized)

images_test = np.array(images_test, np.float32)
print(images_test.shape)

In [12]:
#partitioning the data set into validation sets
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2)

print("x_train: {}".format(x_train.shape)) 
print("x_val: {}".format(x_val.shape)) 
print("y_train: {}".format(y_train.shape)) 
print("y_val: {}".format(y_val.shape))

In [14]:
import warnings
warnings.filterwarnings('ignore')
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import optimizers

In [15]:
import tensorflow as tf
# Model specification
gc.collect()

def net():
    return tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=6, kernel_size=(3,3), activation='relu',
                               padding='same'),
        tf.keras.layers.AvgPool2D(pool_size=2, strides=2),
        tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3),
                               activation='relu'),
        tf.keras.layers.AvgPool2D(pool_size=2, strides=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(2176)])

In [16]:
X = tf.random.uniform((3, 64, 64, 3))
for layer in net().layers:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape: \t', X.shape)

In [17]:
model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(filters=6, kernel_size=(3,3), activation='relu',
                               padding='same'),
        tf.keras.layers.AvgPool2D(pool_size=2, strides=2),
        tf.keras.layers.Conv2D(filters=16, kernel_size=(3,3),
                               activation='relu'),
        tf.keras.layers.AvgPool2D(pool_size=2, strides=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(2176)])

In [18]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=False)
model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

In [19]:

model.fit(x_train, y_train, validation_data=(x_val, y_val),batch_size=128,verbose=2, epochs=20,shuffle=True)
        



In [None]:
#making predictions using the test images
pred_test = model.predict(images_test, batch_size = 128, verbose=2) 