## Setup

In [1]:
import socket
KAGGLE = True
import re
if ((re.compile(r"computerfon").search(socket.gethostname().lower())) != None):
    KAGGLE = False
print(f"KAGGLE: {KAGGLE}")

KAGGLE: False


### Imports

In [2]:
if KAGGLE:
    !pip install -q efficientnet
    from kaggle_datasets import KaggleDatasets
    
# general
from glob import glob
import numpy as np
import pandas as pd
from time import perf_counter
from tqdm.notebook import tqdm


#ML
#DL
import tensorflow as tf
from tensorflow.keras import backend as K
import efficientnet.tfkeras as efn

In [3]:
if not KAGGLE:
    files_test_g = glob("../input/cqt-g2net-test*/*.tfrec")
    files_train_g = glob("../input/g2net-make-tfrecords*/*.tfrec")

### PARAMS

In [4]:
def auto_select_accelerator():
    TPU_DETECTED = False
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print(f"Running on TPU: {tpu.master()}")
        TPU_DETECTED = True
    except ValueError:
        strategy = tf.distribute.get_strategy()
    REPLICAS = strategy.num_replicas_in_sync
    print(f"Running on {REPLICAS} replica{'s' if REPLICAS > 1 else ''}")
    return strategy, TPU_DETECTED, REPLICAS

In [5]:
glob("")

[]

In [6]:
class PARAMS:
    COMPETITION_DATASET_PATH = "../input/g2net-gravitational-wave detection" if KAGGLE else "../input/g2net-input"
    AUTO = tf.data.experimental.AUTOTUNE
    STRATEGY, TPU_DETECTED, REPLICAS = auto_select_accelerator()
    IMG_SIZES = 265
    BATCH_SIZES = 64
    FOLDS = range(4)
    EFFNET = 7
    PRETRAINED_MODEL_PATH = f"../input/g2net-effnetb{EFFNET}"
    

Running on 1 replica


## Middle Part (where the functions live)

In [7]:
def read_labeled_tfrecord(example):
    tfrec_format = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_id": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_image(example["image"]), tf.reshape(tf.cast(example["target"], tf.float32), [1])

def read_unlabeled_tfrecord(example, return_image_id):
    tfrec_format = {
        "image" : tf.io.FixedLenFeature([], tf.string),
        "image_id" : tf.io.FixedLenFeature([], tf.string)
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return prepare_image(example["image"]), example["image_id"] if return_image_id else 0

def prepare_image(img, dim=PARAMS.IMG_SIZES):
    img = tf.image.resize(tf.image.decode_png(img, channels=3), size=(dim, dim))
    img = tf.cast(img, tf.float32) / 255.0
    img = tf.reshape(img, [dim, dim, 3])
    
    return img

In [8]:
def get_dataset(files,shuffle=False,repeat=False,
               labeled=True,return_image_ids=True,batch_size=16,dim=PARAMS.IMG_SIZES):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads = PARAMS.AUTO)
    ds.cache()
    
    if repeat:
        ds = ds.repeat()
        
    if shuffle:
        ds = ds.shuffle(1024*2)
        opt = tf.data.Options()
        opt.exprimental_deterministic = False
        df = ds.with_options(opt)
        
    if labeled:
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=PARAMS.AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_ids),
                   num_parallel_calls=PARAMS.AUTO)
    ds = ds.batch(batch_size * PARAMS.REPLICAS)
    ds = ds.prefetch(PARAMS.AUTO)
    return ds

In [9]:
EFNS = [efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3,
        efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7]

def build_model(size, path):
    inp = tf.keras.layers.Input(shape=(size,size,3))
    base_net = EFNS[PARAMS.EFFNET](input_shape=(size,size,3),weights=path,include_top=False)
    
    x = base_net(inp)
    x = tf.keras.layers.GlobalAvgPool2D()(x)
    x = tf.keras.layers.Dropout(0.)(x)
    x = tf.keras.layers.Dense(1,activation="sigmoid")(x)
    
    model = tf.keras.Model(inputs=inp, outputs=x)
    loss = tf.keras.losses.BinaryCrossentropy()
    model.compile(optimizer="adam", loss=loss,metrics=["AUC"])
    return model

In [10]:
def predict(paths, is_label=False):
    pred = []
    ids = []
    
    ds = get_dataset(paths,labeled=False,return_image_ids=False,
                    repeat=False,shuffle=False,dim=PARAMS.IMG_SIZES,batch_size=PARAMS.BATCH_SIZES*2)
    for fold in PARAMS.FOLDS:
        print(f"{'#'*50} --> FOLD {fold + 1}")
        start_time = perf_counter()
        
        K.clear_session()
        
        with PARAMS.STRATEGY.scope():
            model = build_model(PARAMS.IMG_SIZES, None)
            print("\t --> Loading Model")
            model.load_weights(f"{PARAMS.PRETRAINED_MODEL_PATH}/fold-{fold}.h5")
            print("\t <-- Model loaded")
        print("\t-->Start Predict...")
        pred.append(model.predict(ds, verbose=1).flatten())
        print("\t<--Predict finished.")
        print(f"<-- FOLD {fold + 1} finished, duration = {pref_counter() - start_time}s")
        
        if is_label:
            ds = get_dataset(paths,labeled=True,return_image_ids=False,
                          repeat=False,shuffle=False,dim=PARAMS.IMG_SIZES,batch_size=PARAMS.BATCH_SIZES*2)
            ids = np.array([target.numpy])
            
    return pred, ids

## Inference

In [11]:
pred, target = predict(np.array(files_train_g), True)

################################################## --> FOLD 1
	 --> Loading Model
	 <-- Model loaded
	-->Start Predict...
    173/Unknown - 100s 534ms/step

KeyboardInterrupt: 

In [None]:
sub = pd.read_csv(f"{PARAMS.COMPETITION_DATASET_PATH}/sample_submission.csv")
sub["id"] = ids

In [None]:
sub.to_csv("submission.csv", index=False)