In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.contrib.slim as slim

from tqdm import tqdm
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, BatchNormalization, Dropout
from keras.models import Sequential
from keras.optimizers import Adam

In [None]:
class DataLoader:
    def __init__(self, npy_file: str = "npy_data"):
        self.npy_file = npy_file
        self.csv_name = "../input/train.csv"
        self.df = self.read_csv()
        self.n_classes = 2

        os.makedirs(self.npy_file, exist_ok=True)

    def read_csv(self):
        df = pd.read_csv(self.csv_name)

        return df

    def read_data(self, load_from_npy: bool = True, size2resize: tuple = (75, 75), make_gray: bool = True,
                  save: bool = True, categorical: bool = False, n_classes: int = 2):

        x_data = []
        y_data = []

        if load_from_npy:
            try:
                x_data = np.load(fr"{self.npy_file}/x_data.npy")
                y_data = np.load(fr"{self.npy_file}/y_data.npy")
            except FileNotFoundError:
                load_from_npy = False
                print("NPY files not found!")
                pass

        if not load_from_npy:
            x_data = []
            y_data = []

            for dir_label in tqdm(self.df.values):
                img = cv2.imread(os.path.join("../input", "train/train", dir_label[0]))

                if make_gray:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                img = cv2.resize(img, size2resize)

                x_data.append(img)
                y_data.append(int(dir_label[1]))

                del img

            x_data = np.array(x_data)
            y_data = np.array(y_data)

            if save:
                np.save(fr"{self.npy_file}/x_data.npy", x_data)
                np.save(fr"{self.npy_file}/y_data.npy", y_data)

        if categorical:
            y_data = tf.keras.utils.to_categorical(y_data, num_classes=n_classes)

        if not categorical:
            y_data = y_data.reshape(-1, 1)

        if load_from_npy and make_gray:
            try:
                x_data_2 = [cv2.cvtColor(n, cv2.COLOR_BGR2GRAY) for n in x_data]
                x_data = x_data_2
            except cv2.error:
                pass

        if make_gray:
            x_data = np.expand_dims(x_data, axis=-1)

        return x_data, y_data

    def read_test_data(self, load_from_npy: bool = True, size2resize: tuple = (75, 75), make_gray: bool = True,
                  save: bool = True, categorical: bool = False, n_classes: int = 2):

        test_df = pd.read_csv("../input/sample_submission.csv")

        x_data = []
        y_data = []

        if load_from_npy:
            try:
                x_data = np.load(fr"{self.npy_file}/x_data_test.npy")
                y_data = np.load(fr"{self.npy_file}/y_data_test.npy")
            except FileNotFoundError:
                load_from_npy = False
                print("NPY files not found!")
                pass

        if not load_from_npy:
            x_data = []
            y_data = []

            for dir_label in tqdm(test_df.values):
                img = cv2.imread(os.path.join("../input", "test/test", dir_label[0]))

                if make_gray:
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                img = cv2.resize(img, size2resize)

                x_data.append(img)
                y_data.append(int(dir_label[1]))

                del img

            x_data = np.array(x_data)
            y_data = np.array(y_data)

            if save:
                np.save(fr"{self.npy_file}/x_data_test.npy", x_data)
                np.save(fr"{self.npy_file}/y_data_test.npy", y_data)

        if categorical:
            y_data = tf.keras.utils.to_categorical(y_data, num_classes=n_classes)

        if not categorical:
            y_data = y_data.reshape(-1, 1)

        if load_from_npy and make_gray:
            try:
                x_data_2 = [cv2.cvtColor(n, cv2.COLOR_BGR2GRAY) for n in x_data]
                x_data = x_data_2
            except cv2.error:
                pass

        if make_gray:
            x_data = np.expand_dims(x_data, axis=-1)

        return x_data, y_data

In [None]:
class TrainWithKeras:
    def __init__(self, x_data, y_data, lr: float = 0.001, epochs: int = 10, batch_size: int = 32,
                 loss: str = "categorical_crossentropy", model_path: str = "model.h5"):
        self.x_data = x_data
        self.y_data = y_data
        self.model_path = model_path

        self.epochs = epochs
        self.batch_size = batch_size

        self.optimizer = Adam(lr=lr)
        self.loss = loss

    def make_model(self, summarize: bool = True):
        model = Sequential()

        model.add(Conv2D(64, (3, 3), strides=1, activation="relu",
                         input_shape=(self.x_data.shape[1], self.x_data.shape[2], self.x_data.shape[3])))
        model.add(MaxPooling2D())
        model.add(Conv2D(128, (3, 3), strides=1, activation="relu"))
        model.add(Dropout(0.3))
        model.add(BatchNormalization())

        model.add(Conv2D(256, (3, 3), strides=1, activation="relu"))
        model.add(MaxPooling2D())
        model.add(Conv2D(512, (3, 3), strides=1, activation="relu"))
        model.add(Dropout(0.3))

        model.add(Conv2D(1024, (3, 3), strides=1, activation="relu"))
        
        model.add(Flatten())

        model.add(Dense(1024, activation="relu"))
        model.add(Dropout(0.3))
        model.add(Dense(2, activation="softmax"))

        if summarize:
            model.summary()

        return model

    def compile(self, kmodel: Sequential):
        kmodel.compile(loss=self.loss, optimizer=self.optimizer, metrics=["acc"])

        return kmodel

    def train(self, kmodel: Sequential, save: bool = True):
        history = kmodel.fit(self.x_data, self.y_data, batch_size=self.batch_size, epochs=self.epochs,
                             validation_split=0.0)

        if save:
            kmodel.save(self.model_path)

        return history, kmodel

In [None]:
class MakeSubmission:
    def __init__(self, x_test: np.array, model_path: str, csv_path: str):
        self.x_test = x_test
        self.model_path = model_path
        self.csv_path = csv_path

        self.model = tf.keras.models.load_model(self.model_path)
        self.df = pd.read_csv(self.csv_path)

        preds = self.make_predictions()

        submission = pd.DataFrame({'id': self.df['id'], 'has_cactus': preds})
        submission.to_csv("sample_submission.csv", index=False)

    def make_predictions(self, make_it_ready: bool = True):
        preds = self.model.predict(self.x_test)

        if make_it_ready:
            preds = [np.argmax(n) for n in preds]

        return preds

In [None]:
os.makedirs("models", exist_ok=True)

dl = DataLoader()
X_data, Y_data = dl.read_data(True, (32, 32), False, True, True, 2)

In [None]:
trainer = TrainWithKeras(X_data, Y_data, model_path="models/model.h5", epochs=50, batch_size=1024, lr=0.0002)
model = trainer.make_model()
model = trainer.compile(model)

histroy = trainer.train(model)

In [None]:
X_data_test, Y_data_test = dl.read_test_data(True, (32, 32), False, True, False)
ms = MakeSubmission(X_data_test, "models/model.h5", "../input/sample_submission.csv")