In [1]:
import pickle

with open('denoise_dataset.pkl', 'rb') as f:
    originals, noisy = pickle.load(f)

In [2]:
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
MODEL_NAME = "denoiser_unet"
TRAIN_DATA_SIZE = 0.8

In [4]:
import tensorflow as tf

physical_devices = tf.config.list_physical_devices('GPU')
try:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  # Invalid device or cannot modify virtual devices once initialized.
    pass

tf.keras.mixed_precision.set_global_policy('mixed_float16')


dataset = tf.data.Dataset.from_tensor_slices((noisy, originals)).shuffle(1028).prefetch(tf.data.AUTOTUNE).batch(BATCH_SIZE)

train_size = int(TRAIN_DATA_SIZE * dataset.cardinality().numpy())

train_dataset = dataset.take(train_size)
test_dataset = dataset.skip(train_size)

assert dataset.cardinality() == train_dataset.cardinality() + test_dataset.cardinality()

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Tesla V100-SXM2-16GB, compute capability 7.0


2022-05-08 14:05:31.623502: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-08 14:05:31.631541: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-05-08 14:05:31.632472: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-08 14:05:31.634204: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

In [11]:
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model, Sequential

class EncoderLayer(Layer):
    def __init__(self, n_layers, kernel_size, dropout_rate=0.2):
        super(EncoderLayer, self).__init__()
        self.conv = Conv1D(n_layers, kernel_size, activation='relu', padding='same')
        self.conv2 = Conv1D(n_layers*2, kernel_size, activation='relu', padding='same')
        self.conv3 = Conv1D(n_layers*2, kernel_size, activation='relu', padding='same')
        self.conv4 = Conv1D(n_layers, kernel_size, activation='relu', padding='same')

        self.pooling = MaxPool1D(2)

        self.bn = BatchNormalization()
        self.dropout = SpatialDropout1D(dropout_rate)

    def call(self, x):
        x = self.conv(x)
        x = self.conv2(x)
        x = self.conv3(x)
        skip = self.conv4(x)

        x = self.pooling(skip)

        x = self.bn(x)
        x = self.dropout(x)

        return x, skip

class DecoderLayer(Layer):
    def __init__(self, n_layers, kernel_size, dropout_rate=0.2):
        super(DecoderLayer, self).__init__()
        self.upconv = Conv1DTranspose(n_layers, kernel_size=kernel_size, strides=2, activation='relu', padding='same')
        self.conv = Conv1D(n_layers*2, kernel_size=kernel_size, activation='relu', padding='same')
        self.conv2 = Conv1D(n_layers, kernel_size, activation='relu', padding='same')
        self.bn = BatchNormalization()
        self.dropout = SpatialDropout1D(dropout_rate)

    def call(self, x):
        x = self.upconv(x)
        x = self.conv(x)
        x = self.conv2(x)
        x = self.bn(x)
        x = self.dropout(x)

        return x
    
class Autoencoder(Model):
    PAD = 250
    GAUSSIAN_STDDEV = 2/5500
    SPATIAL_DROPOUT_PROB = 0.2
    
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.reshape = Reshape([5500, 1])
        self.noise = GaussianNoise(stddev=self.GAUSSIAN_STDDEV)
        self.normalize = Normalization()
        self.pad = ZeroPadding1D(self.PAD)

        self.encoder1 = EncoderLayer(512, 3, dropout_rate=self.SPATIAL_DROPOUT_PROB)
        self.encoder2 = EncoderLayer(256, 3, dropout_rate=self.SPATIAL_DROPOUT_PROB)
        self.encoder3 = EncoderLayer(128, 3, dropout_rate=self.SPATIAL_DROPOUT_PROB)
        self.encoder4 = EncoderLayer(64, 3, dropout_rate=self.SPATIAL_DROPOUT_PROB)

        self.latent_proj = Sequential([
            Conv1D(32, 1, activation='relu', padding='same'),
            Conv1D(64, 1, activation='relu', padding='same'),
            Conv1D(32, 1, activation='relu', padding='same')
        ])

        self.decoder1 = DecoderLayer(64, 4, dropout_rate=self.SPATIAL_DROPOUT_PROB)
        self.decoder2 = DecoderLayer(128, 2, dropout_rate=self.SPATIAL_DROPOUT_PROB)
        self.decoder3 = DecoderLayer(256, 2, dropout_rate=self.SPATIAL_DROPOUT_PROB)
        self.decoder4 = DecoderLayer(512, 2, dropout_rate=self.SPATIAL_DROPOUT_PROB)


        self.concat1 = Concatenate()
        self.concat2 = Concatenate()
        self.concat3 = Concatenate()
        self.concat4 = Concatenate()

        self.upsample = Conv1DTranspose(32, kernel_size=1, strides=2, activation='relu', padding='same')
        
        self.out = Sequential([
            Conv1D(64, kernel_size=3, activation='relu', padding='same'),
            Conv1D(64, kernel_size=3, activation='relu', padding='same'),
            Conv1D(32, kernel_size=3, activation='relu', padding='same'),
            Conv1D(1, kernel_size=1, activation='tanh', padding='same')
        ])

        self.crop = Cropping1D(self.PAD)
        

    def call(self, x):
        
        x = self.reshape(x) # [5500, 1]
        x = self.noise(x)
        x = self.normalize(x)

        x = self.pad(x) # [6000, 1]

        x, skip1 = self.encoder1(x) # [3000, 512], [6000, 512]
        x, skip2 = self.encoder2(x) # [1500, 256], [3000, 256]
        x, skip3 = self.encoder3(x) # [750, 128], [1500, 128]
        x, skip4 = self.encoder4(x) # [375, 64], [750, 64]

        latent = self.latent_proj(x) # [375, 32]

        x = self.decoder1(latent) # [750, 64]
        x = self.concat1([x, skip4]) # [750, 64+64]

        x = self.decoder2(x) # [1500, 128]
        x = self.concat2([x, skip3]) # [1500, 128+128]

        x = self.decoder3(x) # [3000, 256]
        x = self.concat3([x, skip2]) # [3000, 256+256]

        x = self.decoder4(x) # [6000, 512]
        x = self.concat4([x, skip1]) # [6000, 512+512]

        x = self.crop(x) # [5500, 1024]
        x = self.upsample(x) # [11000, 32]
        out = self.out(x) # [11000, 1]

        return out
# Add data aug
# ADD spatial dropout
    
model = Autoencoder()

In [None]:
import tensorflow as tf
import os
from datetime import datetime

optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)

model.compile(optimizer=optimizer, loss='mse', 
            metrics=[tf.keras.metrics.MeanAbsoluteError()])


early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, min_delta=0.0001)
reduce_lr_callback = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.3, min_lr=1e-6, min_delta=0.005)

logdir = os.path.join(f"{MODEL_NAME}_logs", datetime.now().strftime("%Y%m%d-%H%M%S"))
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=f'{MODEL_NAME}.tf', 
                                                         save_format="tf",
                                                         monitor='val_loss', save_best_only=True)

model.fit(train_dataset,
     validation_data=test_dataset,
     epochs=25,
     callbacks=[early_stopping_callback, checkpoint_callback, reduce_lr_callback]
)

In [13]:
model.evaluate(test_dataset)



[0.005875778384506702, 0.046353068202733994]

In [14]:
import pickle

with open('denoise_testset_noisy.pkl', 'rb') as f:
    eval_data = pickle.load(f)

In [21]:
eval_data_pred = model.predict(eval_data).squeeze(-1)

In [39]:
def format_predictions(preds):
    return "\n".join([ ";".join([ str(number) for number in p ]) for p in preds.tolist()])

In [43]:
with open("answer.txt", "w+") as f:
    f.write(format_predictions(eval_data_pred))