/
loss_functions.py
113 lines (97 loc) · 4.93 KB
/
loss_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import tensorflow as tf
import numpy as np
def gradient_difference_loss(true, pred, alpha=2.0):
"""
computes gradient difference loss of two images
:param ground truth image: Tensor of shape (batch_size, frame_height, frame_width, num_channels)
:param predicted image: Tensor of shape (batch_size, frame_height, frame_width, num_channels)
:param alpha parameter of the used l-norm
"""
#tf.assert_equal(tf.shape(true), tf.shape(pred))
# vertical
true_pred_diff_vert = tf.pow(tf.abs(difference_gradient(true, vertical=True) - difference_gradient(pred, vertical=True)), alpha)
# horizontal
true_pred_diff_hor = tf.pow(tf.abs(difference_gradient(true, vertical=False) - difference_gradient(pred, vertical=False)), alpha)
# normalization over all dimensions
return (tf.reduce_mean(true_pred_diff_vert) + tf.reduce_mean(true_pred_diff_hor)) / tf.to_float(2)
def difference_gradient(image, vertical=True):
"""
:param image: Tensor of shape (batch_size, frame_height, frame_width, num_channels)
:param vertical: boolean that indicates whether vertical or horizontal pixel gradient shall be computed
:return: difference_gradient -> Tenor of shape (:, frame_height-1, frame_width, :) if vertical and (:, frame_height, frame_width-1, :) else
"""
s = tf.shape(image)
if vertical:
return tf.abs(image[:, 0:s[1] - 1, :, :] - image[:, 1:s[1], :, :])
else:
return tf.abs(image[:, :, 0:s[2]-1,:] - image[:, :, 1:s[2], :])
def mean_squared_error(true, pred):
"""L2 distance between tensors true and pred.
Args:
true: the ground truth image.
pred: the predicted image.
Returns:
mean squared error between ground truth and predicted image.
"""
return tf.reduce_sum(tf.square(true - pred)) / tf.to_float(tf.size(pred))
def peak_signal_to_noise_ratio(true, pred):
"""Image quality metric based on maximal signal power vs. power of the noise.
Args:
true: the ground truth image.
pred: the predicted image.
Returns:
peak signal to noise ratio (PSNR)
"""
return 10.0 * tf.log(1.0 / mean_squared_error(true, pred)) / tf.log(10.0)
def kl_penalty(mu, sigma):
x = tf.square(mu) + tf.square(sigma) - tf.log(1e-8 + tf.square(sigma)) - 1
return 0.5 * tf.reduce_sum(x)
def decoder_loss(frames_gen, frames_original, loss_fun):
"""Sum of parwise loss between frames of frames_gen and frames_original
Args:
frames_gen: array of length=sequence_length of Tensors with each having the shape=(batch size, frame_height, frame_width, num_channels)
frames_original: Tensor with shape=(batch size, sequence_length, frame_height, frame_width, num_channels)
loss_fun: loss function type ['mse',...]
Returns:
loss: sum (specified) loss between ground truth and predicted frames of provided sequence.
"""
loss = 0.0
if loss_fun == 'mse' or loss_fun == 'vae':
for i in range(len(frames_gen)):
loss += mean_squared_error(frames_original[:, i, :, :, :], frames_gen[i])
elif loss_fun == 'gdl':
for i in range(len(frames_gen)):
loss += gradient_difference_loss(frames_original[:, i, :, :, :], frames_gen[i])
elif loss_fun == 'mse_gdl':
for i in range(len(frames_gen)):
loss += 0.4 * gradient_difference_loss(frames_original[:, i, :, :, :], frames_gen[i]) + 0.6 * mean_squared_error(frames_original[:, i, :, :, :], frames_gen[i])
else:
raise Exception('Unknown loss funcion type')
return loss
def decoder_psnr(frames_gen, frames_original):
"""Sum of peak_signal_to_noise_ratio loss between frames of frames_gen and frames_original
Args:
frames_gen: array of length=sequence_length of Tensors with each having the shape=(batch size, frame_height, frame_width, num_channels)
frames_original: Tensor with shape=(batch size, sequence_length, frame_height, frame_width, num_channels)
loss_fun: loss function type ['mse',...]
Returns:
loss: sum of mean squared error between ground truth and predicted frames of provided sequence.
"""
psnr = 0.0
for i in range(len(frames_gen)):
psnr += peak_signal_to_noise_ratio(frames_original[:, i, :, :, :], frames_gen[i])
return psnr
def composite_loss(original_frames, frames_pred, frames_reconst, loss_fun='mse',
encoder_length=5, decoder_future_length=5,
decoder_reconst_length=5, mu_latent=None, sigm_latent=None):
assert encoder_length <= decoder_reconst_length
frames_original_future = original_frames[:, (encoder_length):(encoder_length + decoder_future_length), :, :, :]
frames_original_reconst = original_frames[:, (encoder_length - decoder_reconst_length):encoder_length, :, :, :]
pred_loss = decoder_loss(frames_pred, frames_original_future, loss_fun)
reconst_loss = decoder_loss(frames_reconst, frames_original_reconst, loss_fun)
if loss_fun == 'vae':
assert mu_latent is not None and mu_latent is not None
loss = pred_loss + reconst_loss + kl_penalty(tf.squeeze(mu_latent), tf.squeeze(sigm_latent))
else:
loss = pred_loss + reconst_loss
return loss