### Imports

In [29]:
# Libraries
import os
from importlib import reload

import tensorflow as tf
import keras
import sklearn.model_selection

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Scripts
from helpers import data, runners, utils, generators

In [2]:
# Should be v2.10
!pip show tensorflow 

Name: tensorflow
Version: 2.13.0
Summary: TensorFlow is an open source machine learning framework for everyone.
Home-page: https://www.tensorflow.org/
Author: Google Inc.
Author-email: packages@tensorflow.org
License: Apache 2.0
Location: /Users/antrikshdhand/Documents/github/thesis-ml/ml/virt/lib/python3.9/site-packages
Requires: tensorflow-macos
Required-by: 


In [3]:
# Should be v2.10
!pip show keras 

Name: keras
Version: 2.13.1
Summary: Deep learning for humans.
Home-page: https://keras.io/
Author: Keras team
Author-email: keras-users@googlegroups.com
License: Apache 2.0
Location: /Users/antrikshdhand/Documents/github/thesis-ml/ml/virt/lib/python3.9/site-packages
Requires: 
Required-by: tensorflow-macos


In [4]:
reload(runners)
reload(data)
reload(utils)
reload(generators)

<module 'helpers.generators' from '/Users/antrikshdhand/Documents/github/thesis-ml/ml/main/helpers/generators.py'>

In [5]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

# Check whether tf is using the GPU
utils.check_gpu_use() 

GPU DETECTED ✓
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [6]:
np.random.seed(42)

## `main`

In [7]:
NUM_EPOCHS = 5
DATA_BATCH_SIZE = 16
GPU_BATCH_SIZE = 4

### Load data

In [8]:
all_files_df, ships = data.get_dataset_info(path_to_root="../data/deepship_baseline_unnorm_mat", ext='mat')

# Filter ships which have multiple recordings
ships_multiple_recordings = {k: v for k, v in ships.items() if len(v) > 1}

# Get a list of all segments whose ships have multiple recordings
multiple_recordings_df = all_files_df[all_files_df["ship_name"].isin(ships_multiple_recordings)]

multiple_recordings_df

Unnamed: 0,ship_name,class,file_path,date,seg
0,SEA_IMP,Tug,../data/deepship_baseline_unnorm_mat/Tug/SEA_I...,20171118,158
1,SEASPAN_EAGLE,Tug,../data/deepship_baseline_unnorm_mat/Tug/SEASP...,20171215,11
3,SEASPAN_RAVEN,Tug,../data/deepship_baseline_unnorm_mat/Tug/SEASP...,20171118,198
4,SEA_IMP,Tug,../data/deepship_baseline_unnorm_mat/Tug/SEA_I...,20171201,154
5,SEASPAN_EAGLE,Tug,../data/deepship_baseline_unnorm_mat/Tug/SEASP...,20171202,178
...,...,...,...,...,...
53493,KIRKEHOLMEN,Tanker,../data/deepship_baseline_unnorm_mat/Tanker/KI...,20170822,107
53495,CHAMPION_ISTRA,Tanker,../data/deepship_baseline_unnorm_mat/Tanker/CH...,20171126,70
53498,CHAMPION_CORNELIA,Tanker,../data/deepship_baseline_unnorm_mat/Tanker/CH...,20160829,12
53500,CHEMBULK_NEW,Tanker,../data/deepship_baseline_unnorm_mat/Tanker/CH...,20160531,128


In [9]:
# # Get train-validate-test splits

# train_df, test_df = sklearn.model_selection.train_test_split(
#     multiple_recordings_df,
#     test_size=0.2,
#     random_state=100,
#     shuffle=True
# )

# val_df, test_df = sklearn.model_selection.train_test_split(
#     test_df,
#     test_size=0.5,
#     random_state=100,
#     shuffle=True
# )

In [None]:
# Initialise generators
train_gen = generators.N2NDeepShipGenerator(
    multiple_recordings_df,
    'mat',
    'Ptrans',
    batch_size=DATA_BATCH_SIZE,
    shuffle=True,
    conv_channel=True
)

# val_gen = generators.N2NDeepShipGenerator(
#     val_df,
#     'mat',
#     'Ptrans',
#     batch_size=DATA_BATCH_SIZE,
#     shuffle=True,
#     conv_channel=True
# )

# test_gen = generators.N2NDeepShipGenerator(
#     test_df,
#     'mat',
#     'Ptrans',
#     batch_size=DATA_BATCH_SIZE,
#     shuffle=True,
#     conv_channel=True
# )

### Initialise model

In [None]:
from models import irfan_2020, unet_n2n

# model = irfan_2020.get_irfan_model(input_shape=(192, 192, 1))
model = unet_n2n.get_unet_model(input_shape=(192, 192, 1))

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.00001, beta_1=0.9, beta_2=0.99, epsilon=1e-8),
    loss=keras.losses.MeanSquaredError(),
    metrics=[ssim]
)

model.summary()

### Train model

In [None]:
csv_logger = keras.callbacks.CSVLogger(
    filename="models/saved/diff_spec_denoiser_05122024/unet/training.log",
    separator=",",
    append=False
)

backup_callback = keras.callbacks.BackupAndRestore(
    backup_dir="models/saved/diff_spec_denoiser_05122024/unet/tmp"
)

In [None]:
history = model.fit(
    train_gen,
    validation_data=val_gen,
    batch_size=GPU_BATCH_SIZE,
    epochs=NUM_EPOCHS,
    steps_per_epoch=train_gen.get_epoch_length() // NUM_EPOCHS,
    callbacks=[csv_logger, backup_callback],
    verbose=2
)

In [None]:
evals = model.evaluate(
    test_gen,
    batch_size=GPU_BATCH_SIZE,
    verbose=2
)

In [None]:
model.save('models/saved/diff_spec_denoiser_05122024/unet/unet_denoiser.keras')

### Evaluation, metrics, and visualising denoised output

In [None]:
reload(utils)
fig = utils.get_psnr_and_loss_curves(history, together=True)
fig.savefig("models/saved/diff_spec_denoiser_05122024/unet/img/psnr_loss_curves.pdf")

In [None]:
# Get a few samples from the test generator for visualisation and metrics calculation
num_samples = 5 

sample_X, sample_y = next(iter(test_gen))
sample_inputs = sample_X[:num_samples]  # Extract the first `num_samples` examples from the batch
sample_outputs = sample_y[:num_samples]  # Extract the first `num_samples` examples from the batch

# Predict the denoised outputs
denoised_outputs = model.predict(sample_inputs)

In [None]:
print("Shape of sample_inputs:", sample_inputs.shape)
print("Shape of denoised_outputs:", denoised_outputs.shape)

In [None]:
# Get evaluation metrics
mse_score = tf.keras.losses.MeanSquaredError()(sample_inputs, denoised_outputs).numpy()
psnr_score = utils.psnr(sample_inputs, denoised_outputs).numpy()
ssim_score = utils.ssim(sample_inputs, denoised_outputs).numpy()

print(f"MSE on sample batch: {mse_score}")
print(f"PSNR on sample batch: {psnr_score}")
print(f"SSIM on sample batch: {ssim_score}")

In [None]:
# Check if all outputs are normalised
for i, output in enumerate(denoised_outputs):
    print(f"Sample {i+1} - Min: {np.min(output)}; Max: {np.max(output)}")

In [None]:
# Create a single figure
fig, axes = plt.subplots(num_samples, 3, figsize=(14, num_samples * 3.5), sharex=True, sharey=True)

# fig.tight_layout(pad=3.0)

for i in range(num_samples):
    axes[i, 0].imshow(sample_inputs[i].squeeze(), cmap="viridis", aspect="auto")
    axes[i, 1].imshow(denoised_outputs[i].squeeze(), cmap="viridis", aspect="auto")
    axes[i, 2].imshow(sample_outputs[i].squeeze(), cmap="viridis", aspect="auto")

axes[0, 0].set_title(f"Input\n", fontsize=18)
axes[0, 1].set_title(f"Denoised Output\n", fontsize=18)
axes[0, 2].set_title(f"Output\n", fontsize=18)

fig.tight_layout(pad=1.5)
fig.savefig(f"models/saved/diff_spec_denoiser_05122024/unet/img/combined_spectrograms.pdf", bbox_inches="tight")

plt.show()

In [None]:
# Plot the input, output, and target spectrograms for each sample
for i in range(num_samples):
    plt.figure(figsize=(12, 4))
    
    # INPUT
    plt.subplot(1, 3, 1)
    plt.imshow(sample_inputs[i].squeeze(), cmap="viridis", aspect="auto")
    plt.title("Input")
    # plt.colorbar()

    # DENOISED OUTPUT
    plt.subplot(1, 3, 2)
    plt.imshow(denoised_outputs[i].squeeze(), cmap="viridis", aspect="auto")
    plt.title("Denoised Output")
    # plt.colorbar()

    # TARGET
    plt.subplot(1, 3, 3)
    plt.imshow(sample_outputs[i].squeeze(), cmap="viridis", aspect="auto")
    plt.title("Target")
    # plt.colorbar()
    
    plt.tight_layout()
    plt.show()