In [7]:
from tensorflow.python.ops.gen_nn_ops import depthwise_conv2d_native
#!/usr/bin/env python3
#
# Team members' IDs:
# 182a3da8-8a9e-11ec-986f-f39926f24a9c  (Jan Zubáč)
# 7797f596-9326-11ec-986f-f39926f24a9c
# 449dba85-9adb-11ec-986f-f39926f24a9c
#
import argparse
import datetime
import os
import re
import sys
sys.path.append('/kaggle/input/cags-competition')

os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default

import numpy as np
import tensorflow as tf

from cags_dataset import CAGS
import efficient_net

def main(args: argparse.Namespace) -> None:

    # Fix random seeds and threads
    # tf.keras.utils.set_random_seed(args.seed)
    tf.config.threading.set_inter_op_parallelism_threads(args.threads)
    tf.config.threading.set_intra_op_parallelism_threads(args.threads)

    # Create logdir name
    args.logdir = os.path.join("logs", "{}-{}-{}".format(
        os.path.basename(globals().get("__file__", "notebook")),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(("{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v) for k, v in sorted(vars(args).items())))
    ))

    # Load the data
    cags = CAGS()

    generator = tf.random.Generator.from_seed(args.seed)
    def train_augment(image: tf.Tensor, label: tf.Tensor): # -> Tuple[tf.Tensor, tf.Tensor]:
        if generator.uniform([]) >= 0.5:
            image = tf.image.flip_left_right(image)
        image = tf.image.resize_with_crop_or_pad(image, CAGS.H + 6, CAGS.W + 6)
        image = tf.image.resize(image, [generator.uniform([], CAGS.H, CAGS.H + 12 + 1, dtype=tf.int32),
                                        generator.uniform([], CAGS.W, CAGS.W + 12 + 1, dtype=tf.int32)])
        image = tf.image.crop_to_bounding_box(
            image, target_height=CAGS.H, target_width=CAGS.W,
            offset_height=generator.uniform([], maxval=tf.shape(image)[0] - CAGS.H + 1, dtype=tf.int32),
            offset_width=generator.uniform([], maxval=tf.shape(image)[1] - CAGS.W + 1, dtype=tf.int32),
        )
        return image, label

    # dataset = cags.train.range(10)
    # for entry in dataset:
    #     print(entry.numpy())

    # Load the EfficientNet-B0 model
    efficientnet_b0 = efficient_net.pretrained_efficientnet_b0(include_top=False)

    # TODO: Create the model and train it
    # building model layers
    inputs = tf.keras.layers.Input(shape=[CAGS.H, CAGS.W, CAGS.C])
    hidden  = efficientnet_b0(inputs, training=True)
    out_eff_net, c5, c4, c3, c2, c1  = hidden

    hidden = tf.keras.layers.Flatten()(out_eff_net)
    
    for hidden_layer_size in args.hidden_layers:
        hidden =  tf.keras.layers.Dense(hidden_layer_size, activation=tf.nn.relu) (hidden)
        hidden = tf.keras.layers.Dropout(args.dropout) (hidden)


    
    outputs = tf.keras.layers.Dense(len(CAGS.LABELS), activation=tf.nn.softmax)(hidden)

    # choosing lr
    learning_rate = tf.keras.optimizers.schedules.PolynomialDecay(  args.learning_rate, args.decay_steps, 
                                                                    end_learning_rate=args.learning_rate_final, power=1.0,
                                                                    cycle=False, name='linear_lr_decay')

    # choosing optimizer
    if args.optimizer=='SGD':
        if args.momentum != None:
            momentum = args.momentum
        else:
            momentum = 0.0
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum, nesterov=False, name='SGD')

    if args.optimizer=='Adam':
        optimizer =  tf.keras.optimizers.Adam(learning_rate=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False, name='Adam')


    # compile model
    model =  tf.keras.Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer=optimizer,
        loss=tf.losses.SparseCategoricalCrossentropy(),
        metrics=[tf.metrics.SparseCategoricalAccuracy("accuracy")],
        )
    
    # fit
    tb_callback = tf.keras.callbacks.TensorBoard(args.logdir, update_freq=100, profile_batch=0)

    # print(cags.train)
    train = cags.train.map(lambda example: (example["image"], example["label"]))
    train = train.map(train_augment)
    train = train.shuffle(10000, seed=args.seed)
    train = train.batch(args.batch_size)

    dev = cags.dev.map(lambda example: (example["image"], example["label"]))
    dev = dev.shuffle(10000, seed=args.seed)
    dev = dev.batch(args.batch_size)


    logs = model.fit(
            # cags.train.map(CAGS.parse),
            train,
            batch_size=args.batch_size, epochs=args.epochs,
            validation_data=dev,
            callbacks=[tb_callback]
        )
    
    # # Generate test set annotations, but in `args.logdir` to allow parallel execution.
    os.makedirs(args.logdir, exist_ok=True)
    with open(os.path.join(args.logdir, "cags_classification.txt"), "w", encoding="utf-8") as predictions_file:
        # TODO: Predict the probabilities on the test set
        test = cags.test.map(lambda example: (example["image"], example["label"]))
        # test = test.shuffle(10000, seed=args.seed)
        test = test.batch(args.batch_size)
        test_probabilities = model.predict(test)

        for probs in test_probabilities:
            print(np.argmax(probs), file=predictions_file)
    

In [11]:
# TODO: Define reasonable defaults and optionally more parameters
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=64, type=int, help="Batch size.")
parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=4, type=int, help="Maximum number of threads to use.")
##
parser.add_argument("--window", default=8, type=int, help="Window size to use.")
parser.add_argument("--l2", default=0.0, type=float, help="L2 regularization.")
parser.add_argument("--hidden_layers", default=[20,20], nargs="*", type=int, help="Hidden layer sizes.")
parser.add_argument("--dropout", default=0.2, type=float, help="Dropout regularization.")
parser.add_argument("--decay_steps", default=100, type=int, help="decay_steps.")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Initial learning rate.")
parser.add_argument("--learning_rate_final", default=0.001, type=float, help="Final learning rate.")
parser.add_argument("--momentum", default=0.85, type=float, help="Momentum.")
parser.add_argument("--optimizer", default="SGD", type=str, help="Optimizer to use.")
args = parser.parse_args([] if "__file__" not in globals() else None)
print(args)
main(args)

In [13]:
# TODO: Define reasonable defaults and optionally more parameters
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=64, type=int, help="Batch size.")
parser.add_argument("--epochs", default=100, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=4, type=int, help="Maximum number of threads to use.")
##
parser.add_argument("--window", default=8, type=int, help="Window size to use.")
parser.add_argument("--l2", default=0.0, type=float, help="L2 regularization.")
parser.add_argument("--hidden_layers", default=[100,100], nargs="*", type=int, help="Hidden layer sizes.")
parser.add_argument("--dropout", default=0.4, type=float, help="Dropout regularization.")
parser.add_argument("--decay_steps", default=100, type=int, help="decay_steps.")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Initial learning rate.")
parser.add_argument("--learning_rate_final", default=0.001, type=float, help="Final learning rate.")
parser.add_argument("--momentum", default=0.85, type=float, help="Momentum.")
parser.add_argument("--optimizer", default="SGD", type=str, help="Optimizer to use.")
args = parser.parse_args([] if "__file__" not in globals() else None)
print(args)
main(args)

In [18]:
# TODO: Define reasonable defaults and optionally more parameters
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=128, type=int, help="Batch size.")
parser.add_argument("--epochs", default=250, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=4, type=int, help="Maximum number of threads to use.")
##
parser.add_argument("--window", default=8, type=int, help="Window size to use.")
parser.add_argument("--l2", default=0.0, type=float, help="L2 regularization.")
parser.add_argument("--hidden_layers", default=[100], nargs="*", type=int, help="Hidden layer sizes.")
parser.add_argument("--dropout", default=0.2, type=float, help="Dropout regularization.")
parser.add_argument("--decay_steps", default=100, type=int, help="decay_steps.")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Initial learning rate.")
parser.add_argument("--learning_rate_final", default=0.001, type=float, help="Final learning rate.")
parser.add_argument("--momentum", default=0.85, type=float, help="Momentum.")
parser.add_argument("--optimizer", default="SGD", type=str, help="Optimizer to use.")
args = parser.parse_args([] if "__file__" not in globals() else None)
print(args)
main(args)

In [8]:
# TODO: Define reasonable defaults and optionally more parameters
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=64, type=int, help="Batch size.")
parser.add_argument("--epochs", default=250, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=12, type=int, help="Maximum number of threads to use.")
##
parser.add_argument("--window", default=8, type=int, help="Window size to use.")
parser.add_argument("--l2", default=0.0, type=float, help="L2 regularization.")
parser.add_argument("--hidden_layers", default=[100,100], nargs="*", type=int, help="Hidden layer sizes.")
parser.add_argument("--dropout", default=0.4, type=float, help="Dropout regularization.")
parser.add_argument("--decay_steps", default=100, type=int, help="decay_steps.")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Initial learning rate.")
parser.add_argument("--learning_rate_final", default=0.001, type=float, help="Final learning rate.")
parser.add_argument("--momentum", default=0.85, type=float, help="Momentum.")
parser.add_argument("--optimizer", default="SGD", type=str, help="Optimizer to use.")
args = parser.parse_args([] if "__file__" not in globals() else None)
print(args)
main(args)

In [15]:
os.listdir('/kaggle/working/logs/notebook-2022-03-28_035146-bs=64,ds=100,d=0.4,e=250,hl=[100, 100],l=0.0,lr=0.01,lrf=0.001,m=0.85,o=SGD,s=42,t=12,w=8')


In [20]:
path_file = '/kaggle/working/logs/notebook-2022-03-28_035146-bs=64,ds=100,d=0.4,e=250,hl=[100, 100],l=0.0,lr=0.01,lrf=0.001,m=0.85,o=SGD,s=42,t=12,w=8/cags_classification.txt'
with open('the-zen-of-python.txt') as f:
    contents = f.read()
    print(contents)

In [22]:
path_file = '/kaggle/working/logs/notebook-2022-03-28_035146-bs=64,ds=100,d=0.4,e=250,hl=[100, 100],l=0.0,lr=0.01,lrf=0.001,m=0.85,o=SGD,s=42,t=12,w=8/cags_classification.txt'
a = np.loadtxt(path_file)
