感谢大佬[h053473666](https://www.kaggle.com/h053473666)的数据集[https://www.kaggle.com/h053473666/siimcovid19-512-img-png-600-study-png](https://www.kaggle.com/h053473666/siimcovid19-512-img-png-600-study-png)

In [1]:
!pip install efficientnet -q



In [2]:
import os

import numpy as np
import pandas as pd
from kaggle_datasets import KaggleDatasets
from sklearn.model_selection import train_test_split
import tensorflow as tf
from sklearn.model_selection import KFold

In [3]:
SEED = 0        #随机数种子，用来KFold分数据集
FOLDS = 5        #交叉验证次数
BATCH_SIZES = 18
EPOCHS = 40
ls = 0.015       # 标签平滑，可以尝试0.015，不用请写0，可抗过拟合
IMAGE_SIZE = (224, 240, 260, 300, 380, 456, 528, 600, 512)

SIIM_para = {}
SIIM_para['SEED'] = SEED
SIIM_para['FOLDS'] = FOLDS
SIIM_para['BATCH_SIZES'] = BATCH_SIZES
SIIM_para['EPOCHS'] = EPOCHS
SIIM_para['IMAGE_SIZE'] = IMAGE_SIZE[8]
print('SIIM_parameters: {}'.format(SIIM_para))

SIIM_parameters: {'SEED': 0, 'FOLDS': 5, 'BATCH_SIZES': 18, 'EPOCHS': 40, 'IMAGE_SIZE': 512}


In [4]:
def get_lr_callback():
    lr_start   = 1e-4 # 初始学习率
    lr_max   =  2e-4# 最大学习率
    lr_min     = 1e-7 #最小学习率
    lr_ramp_ep =  3 # 用几个epoch达到最大学习率
    lr_sus_ep  =  3# 用最大的学习率跑几个epoch
    lr_decay   = .4 # 退火，常用方法
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

In [5]:
def auto_select_strategy():
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        print("Running on TPU:", tpu.master())
    except ValueError:
        strategy = tf.distribute.get_strategy()
    print(f"Running on {strategy.num_replicas_in_sync} replicas")
    
    return strategy

In [6]:
def build_decoder(with_labels=True, target_size=(256, 256), ext='jpg'):
    def decode(path):
        file_bytes = tf.io.read_file(path)

        if ext == 'png':
            img = tf.image.decode_png(file_bytes, channels=3)
        elif ext in ['jpg', 'jpeg']:
            img = tf.image.decode_jpeg(file_bytes, channels=3)
        else:
            raise ValueError("Image extension not supported")
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, target_size)

        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if with_labels else decode


def build_augmenter(with_labels=True):
    def augment(img):
        if tf.random.uniform(()) > 0.5:
            img = tf.image.flip_left_right(img)
    
        if tf.random.uniform(()) > 0.4:
            img = tf.image.flip_up_down(img)

        if tf.random.uniform(()) > 0.5:
            img = tf.image.rot90(img, k=1)

#        if tf.random.uniform(()) > 0.45:
#            img = tf.image.random_saturation(img, 0.7, 1.3)

#        if tf.random.uniform(()) > 0.45:
#            img = tf.image.random_contrast(img, 0.8, 1.2)
#            
#        if tf.random.uniform(()) > 0.45:
#            img = tf.image.random_brightness(img, 0.1)
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if with_labels else augment


def build_dataset(paths, labels=None, bsize=128, cache=True,
                  decode_fn=None, augment_fn=None,
                  augment=True, repeat=True, shuffle=1024, 
                  cache_dir=""):
    if cache_dir != "" and cache is True:
        os.makedirs(cache_dir, exist_ok=True)
    
    if decode_fn is None:
        decode_fn = build_decoder(labels is not None)
    
    if augment_fn is None:
        augment_fn = build_augmenter(labels is not None)
    
    AUTO = tf.data.experimental.AUTOTUNE
    slices = paths if labels is None else (paths, labels)
    
    dset = tf.data.Dataset.from_tensor_slices(slices)
    dset = dset.map(decode_fn, num_parallel_calls=AUTO)
    dset = dset.cache(cache_dir) if cache else dset
    dset = dset.map(augment_fn, num_parallel_calls=AUTO) if augment else dset
    dset = dset.repeat() if repeat else dset
    dset = dset.shuffle(shuffle) if shuffle else dset
    dset = dset.batch(bsize).prefetch(AUTO)
    
    return dset

In [7]:
COMPETITION_NAME = "siimcovid19-512-img-png-600-study-png"
strategy = auto_select_strategy()
REPLICAS = strategy.num_replicas_in_sync * BATCH_SIZES
GCS_DS_PATH = KaggleDatasets().get_gcs_path(COMPETITION_NAME)

Running on TPU: grpc://10.0.0.2:8470
Running on 8 replicas


In [8]:
load_dir = f"/kaggle/input/{COMPETITION_NAME}/"
df = pd.read_csv('../input/siimcov19csv/train.csv')
label_cols = df.columns[4]

In [9]:
def build_model(dim=512):
    
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = tf.keras.applications.InceptionResNetV2(input_shape=(dim,dim,3),weights='imagenet',include_top=False)

    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)

    x = tf.keras.layers.Dense(1024, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.2)(x)

    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=ls) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    model.summary()
    
    return model

In [10]:
skf = KFold(n_splits=FOLDS,shuffle=True,random_state=SEED)
df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(skf.split(df, groups = df.StudyInstanceUID.tolist())):
    df.loc[val_idx, 'fold'] = fold

In [11]:
for i in range(5):
    
    valid_paths = GCS_DS_PATH + '/image/' + df[df['fold'] == i]['id'] + '.png' #"/train/"
    train_paths = GCS_DS_PATH + '/image/' + df[df['fold'] != i]['id'] + '.png' #"/train/" 
    valid_labels = df[df['fold'] == i][label_cols].values
    train_labels = df[df['fold'] != i][label_cols].values



    decoder = build_decoder(with_labels=True, target_size=(IMAGE_SIZE[8], IMAGE_SIZE[8]), ext='png')
    test_decoder = build_decoder(with_labels=False, target_size=(IMAGE_SIZE[8], IMAGE_SIZE[8]),ext='png')

    train_dataset = build_dataset(
        train_paths, train_labels, bsize=REPLICAS, decode_fn=decoder
    )

    valid_dataset = build_dataset(
        valid_paths, valid_labels, bsize=REPLICAS, decode_fn=decoder,
        repeat=False, shuffle=False, augment=False
    )

    try:
        n_labels = train_labels.shape[1]
    except:
        n_labels = 1

    with strategy.scope():
        model = build_model(dim=IMAGE_SIZE[8])

    steps_per_epoch = train_paths.shape[0] // REPLICAS
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        f'model{i}.h5', save_best_only=True, monitor='val_loss', mode='min')


    history = model.fit(
        train_dataset, 
        epochs=EPOCHS,
        verbose=1,
        callbacks=[checkpoint, get_lr_callback()],
        steps_per_epoch=steps_per_epoch,
        validation_data=valid_dataset)

    hist_df = pd.DataFrame(history.history)
    hist_df.to_csv(f'history{i}.csv')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 512, 512, 3)]     0         
_________________________________________________________________
inception_resnet_v2 (Functio (None, 14, 14, 1536)      54336736  
_________________________________________________________________
global_average_pooling2d (Gl (None, 1536)              0         
_________________________________________________________________
dense (Dense)                (None, 1024)              1573888   
_________________________________________________________________
dropout (Dropout)            (None, 1024)              0         
_________________________________________________________________
dense_1 (Dense)           

ResourceExhaustedError: 9 root error(s) found.
  (0) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_3_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_5_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (2) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_4_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (3) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_1_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (4) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_7_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (5) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_6_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (6) Resource exhausted: {{function_node __inference_train_function_1123352}} Attempting to reserve 13.16G at the bottom of memory. That was not possible. There are 14.00G free, 0B reserved, and 13.16G reservable.
	 [[{{node cluster_train_function/_execute_2_0}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[cluster_train_function/_execute_6_0/_3451]]
Hint: If you want to see a list of allocated tensors when OOM ... [truncated]