<a href="https://colab.research.google.com/github/Young1906/shopee_product_detection/blob/main/baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
from tqdm import tqdm 
import random, time, os
from sklearn.model_selection import train_test_split
import pandas as pd

# from google.colab import drive
# drive.mount("/gdrive")

!test -d data && ls data || unzip /gdrive/MyDrive/dataset/shopee-code-league-2020-product-detection.zip -d data 1>/dev/null

# Tutorial
# https://www.kaggle.com/fadheladlansyah/product-detection-effnetb5-aug-tta
tf.__version__

resized  test.csv  train.csv


'2.4.1'

## 0. Utilities function


## 1. Input pipeline
- Dataset + preprocessing
- Augmentation

### 1.1 Dataset

In [None]:
# Config
AUTO = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 32
PTH = "data/"
CACHE_PTH = "."

In [None]:
# Constructing dataset
def f(val_size = .1):
    assert val_size < 1, ValueError("Validation size < 1.")

    # Padding function
    pad = lambda x: f"0{x}" if x < 10 else f"{x}"
    
    # train.csv, test.csv
    train = pd.read_csv(f"{PTH}/train.csv")
    test = pd.read_csv(f"{PTH}/test.csv")

    # Constructing file path
    train['fn_pth'] = train.apply(lambda x: f"{PTH}/resized/train/{pad(x['category'])}/{x['filename']}", axis = 1)
    test['fn_pth'] = test.apply(lambda x: f"{PTH}/resized/test/{x['filename']}", axis = 1)

    train = train[["fn_pth", "category"]]
    test = test[["fn_pth", "category"]]

    # Shuffle train data
    n_train, _ = train.shape
    train = train.iloc[np.random.permutation(n_train),:]

    X_train, y_train = train["fn_pth"].values, train["category"].values
    X_test, y_test = test["fn_pth"].values, test["category"].values

    # Validation split
    X_train, X_validate, y_train, y_validate = train_test_split(X_train, y_train, test_size = val_size)

    # clean up shit
    del train, test, n_train, pad

    # TF's Dataset API
    train = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    validation = tf.data.Dataset.from_tensor_slices((X_validate, y_validate))
    test = tf.data.Dataset.from_tensor_slices((X_test, y_test))
    
    return train, validation, test

def make_dataset(dataset):
    global AUTO, BATCH_SIZE, CACHE_PTH

    def load_img(pth, label):
        def f(pth, label):
            # read the img
            img = np.asarray(Image.open(pth.numpy()), dtype = np.uint8)
            img = tf.constant(img)
            # convert label to uint 8
            label = np.uint8(label)
            return img, label
        # Convert f to TF's function
        return tf.py_function(f, (pth, label), (tf.uint8, tf.uint8))

    dataset = dataset \
        .map(load_img, num_parallel_calls=AUTO) \
        .batch(BATCH_SIZE) \
        .cache(CACHE_PTH) \
        .shuffle(2048)

    return dataset

In [None]:
train, valid, test = f()
train = make_dataset(train)

In [None]:
# Define network
class Preprocess(tf.keras.layers.Layer):
    def __init__(self):
        super(Preprocess, self).__init__()
    
    def call(self, X):
        X = tf.keras.applications.efficientnet.preprocess_input(X)
        return X

def make_model():
    with tf.device("/device:GPU:0"):
        basemodel = tf.keras.applications.EfficientNetB7(
            include_top="False",
            weights="imagenet"
        )
        basemodel.trainable = False

        net = tf.keras.Sequential([
            Preprocess(),
            basemodel,
            tf.keras.layers.Dropout(.2),
            tf.keras.layers.Dense(42, activation = "softmax")
        ])

        net.compile(optimizer = "sgd",
                    loss="sparse_categorical_crossentropy",
                    metrics=["accuracy"]
                    )
        net.build(input_shape=(299,299,3))
        net.summary() 
    return net

In [None]:
for i in train:
    print(i);break