In [None]:
from tensorflow import keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Conv1D, MaxPooling1D, concatenate, Input
from tensorflow.keras.optimizers.legacy import Adam
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize, LinearSegmentedColormap
from matplotlib.cm import ScalarMappable

In [None]:
#Import the data
input_path = "/kaggle/input/infrasound5/input.json"
labels_path = "/kaggle/input/infrasound5/labels.json"

data = (pd.read_json(input_path)).values

atmo_input = np.array([np.array(d[0]) for d in data])
fft_input = np.array([np.array(d[1]) for d in data])

print(atmo_input.shape)
print(fft_input.shape)

labels = (pd.read_json(labels_path)).values
labels_output = np.array(labels)
altitude_output = np.array([sublist[0] for sublist in labels_output])
distance_output = np.array([sublist[1] for sublist in labels_output])

print(labels_output.shape)

#Split into train, val, test
atmo_train, atmo_val, fft_train, fft_val, altitude_train, altitude_val, distance_train, distance_val = train_test_split(atmo_input, fft_input, altitude_output, distance_output, test_size=0.2, random_state=42)

In [None]:
# observe the distribution of the data
# 0-1 scale for visualization purposes

#scaler = MinMaxScaler()
#scaled_labels = scaler.fit_transform(labels_output)
x = altitude_val
y = distance_val
plt.scatter(x, y, s=0.10)
plt.xlabel("Source Altitude (km)")
plt.ylabel("Source Distance (km)")
plt.title("Distribution of Source Locations (Validation)")
plt.savefig("Data Distribution Validation.svg")
plt.show()

**Distance**

In [None]:
#atmostats input model
atmoInput = Input(shape = (51, 1))
x = atmoInput

atmostatsOutput = Flatten()(x)

#fft input model
fftInput = Input(shape = (4096, 1))
y = fftInput

numLayersFFT = 3
for i in range(0, numLayersFFT):
    y = Conv1D(64, kernel_size=4, strides = 1, activation='relu')(y)
    y = MaxPooling1D(pool_size=(8))(y)

fftOutput = Flatten()(y)

#concatenate, feed through another DNN
concatenatedInput = concatenate([atmostatsOutput, fftOutput])
z = concatenatedInput

numLayersFinal = 4
for i in range(0, numLayersFinal):
    z = Dense(64, activation='relu')(z)

#final output layer
output = Dense(1)(z)

#model compilation
model1 = Model(inputs=[atmoInput, fftInput], outputs=output)

opt = Adam(learning_rate=0.001)
model1.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

model1.summary()

In [None]:
numEpochs = 200
size = 64

history1 = model1.fit(
    [atmo_train, fft_train], distance_train,
    validation_data=([atmo_val, fft_val], distance_val),
    epochs=numEpochs,
    batch_size=size,
    verbose=1
)

model1.save("src_distance.h5")

In [None]:
# Extract loss values
train_loss = history1.history['mean_absolute_error']
val_loss = history1.history['val_mean_absolute_error']

# Create a range of epoch numbers
epochs = range(1, len(train_loss) + 1)

# Plotting the training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss, 'b-', label='Training Loss')
plt.plot(epochs, val_loss, 'r-', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Loss (km)')
plt.title('Infrasonic Source Localization Results - Distance')
plt.legend()

plt.savefig("Normal_Model_Distances.png")

plt.show()

In [None]:
pd.DataFrame(train_loss).to_csv("distance_train_loss")
pd.DataFrame(val_loss).to_csv("distance_val_loss")

In [None]:
model1 = keras.models.load_model('/kaggle/input/distance/tensorflow2/distance/1/Distance_Model_Normal.h5')

In [None]:
valDistPredictions = model1.predict([atmo_val, fft_val]).flatten()
print(valDistPredictions)

In [None]:
mae_per_sample = np.abs(valDistPredictions - distance_val)
print(np.mean(mae_per_sample))
print(np.median(mae_per_sample))
print(np.max(mae_per_sample))
plt.hist(mae_per_sample, bins=np.arange(0, 700, 10), edgecolor='black')
plt.xlabel("Mean Absolute Loss (km)")
plt.ylabel("Count")
plt.title("Distribution of Validation Source Distance Errors")
plt.savefig("Distance_Error_Distrib.svg")

In [None]:
normalized_mae_per_sample = Normalize(vmin=mae_per_sample.min(), vmax=mae_per_sample.max())

In [None]:
# Define custom colormap
colors = [
    (0.0, "green"),     # 0
    (20/600, "green"),  # 25
    (50/600, "yellow"),  # 100
    (100/600, "orange"),  # 200
    (1.0, "red")       # 600
]

custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", colors, N=256)

# Create a scatter plot with the color scale
plt.scatter(altitude_val, distance_val, c=mae_per_sample, cmap=custom_cmap, s=0.10)

# Add color bar
sm = ScalarMappable(cmap=custom_cmap, norm=normalized_mae_per_sample)
sm.set_array([])  # Only needed for older versions of Matplotlib
cbar = plt.colorbar(sm)
cbar.set_label('Localization Error (km)')

# Add labels and title
plt.xlabel("Source Altitude (km)")
plt.ylabel("Source Distance (km)")
plt.title("Distribution of Source Locations and Distance Localization Errors")
plt.savefig("Location_Error_Distributions_Distance_Model_Edited_ColorBar.svg")
plt.show()

**Altitude**

In [None]:
#model compilation
model2 = Model(inputs=[atmoInput, fftInput], outputs=output)

opt = Adam(learning_rate=0.001)
model2.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

model2.summary()

In [None]:
history2 = model2.fit(
    [atmo_train, fft_train], altitude_train,
    validation_data=([atmo_val, fft_val], altitude_val),
    epochs=numEpochs,
    batch_size=size,
    verbose=1
)

model2.save("src_altitude.h5")

In [None]:
# Extract loss values
train_loss2 = history2.history['mean_absolute_error']
val_loss2 = history2.history['val_mean_absolute_error']

# Create a range of epoch numbers
epochs = range(1, len(train_loss2) + 1)

# Plotting the training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss2, 'b-', label='Training Loss')
plt.plot(epochs, val_loss2, 'r-', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Loss (km)')
y_ticks = np.arange(2, 12.1, 1)  # Change the step size as needed
plt.yticks(y_ticks)
plt.title('Infrasonic Source Localization Results - Altitude')
plt.legend()

plt.savefig("Normal_Model_Altitude.png")

plt.show()

In [None]:
pd.DataFrame(train_loss2).to_csv("altitude_train_loss")
pd.DataFrame(val_loss2).to_csv("altitude_val_loss")

In [None]:
model2 = keras.models.load_model('/kaggle/input/altitude/tensorflow2/altitude/1/Altitude_Model_Normal.h5')

In [None]:
valAltPredictions = model2.predict([atmo_val, fft_val]).flatten()
print(valAltPredictions)

In [None]:
mae_per_sample = np.abs(valAltPredictions - altitude_val)
print(np.mean(mae_per_sample))
print(np.median(mae_per_sample))
print(np.max(mae_per_sample))
plt.hist(mae_per_sample, bins=np.arange(0, 20.2, 0.2), edgecolor='black')
plt.xlabel("Mean Absolute Loss (km)")
plt.ylabel("Count")
plt.title("Distribution of Validation Source Altitude Errors")
plt.savefig("Altitude_Error_Distrib.svg")

In [None]:
normalized_mae_per_sample = Normalize(vmin=mae_per_sample.min(), vmax=mae_per_sample.max())

In [None]:
# Choose a colormap
cmap = plt.get_cmap('RdYlGn_r')  # Red for high error, green for low error

# Create a scatter plot with the color scale
plt.scatter(altitude_val, distance_val, c=mae_per_sample, cmap=cmap, s=0.10)

# Add color bar
sm = ScalarMappable(cmap=cmap, norm=normalized_mae_per_sample)
sm.set_array([])  # Only needed for older versions of Matplotlib
cbar = plt.colorbar(sm)
cbar.set_label('Localization Error (km)')

# Add labels and title
plt.xlabel("Source Altitude (km)")
plt.ylabel("Source Distance (km)")
plt.title("Distribution of Source Locations and Altitude Localization Errors")
plt.savefig("Location_Error_Distributions_Altitude_Model.svg")
plt.show()

**Ablations for Distance**

In [None]:
#fft input model
fftInput = Input(shape = (4096, 1))
y = fftInput

numLayersFFT = 3
for i in range(0, numLayersFFT):
    y = Conv1D(64, kernel_size=4, padding='same', strides = 1, activation='relu')(y)
    y = MaxPooling1D(pool_size=(8))(y)

fftOutput = Flatten()(y)

numLayersFinal = 4
for i in range(0, numLayersFinal):
    y = Dense(64, activation='relu')(y)

#final output layer
output = Dense(1)(y)

#model compilation
model3 = Model(inputs=fftInput, outputs=output)

opt = Adam(learning_rate=0.001)
model3.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

model3.summary()

In [None]:
numEpochs = 100
size = 64

history3 = model3.fit(
    fft_train, distance_train,
    validation_data=(fft_val, distance_val),
    epochs=numEpochs,
    batch_size=size,
    verbose=1
)

model3.save("distance_model_fft_only.h5")

In [None]:
# Extract loss values
train_loss3 = history3.history['mean_absolute_error']
val_loss3 = history3.history['val_mean_absolute_error']

# Create a range of epoch numbers
epochs = range(1, len(train_loss3) + 1)

# Plotting the training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss3, 'b-', label='Training Loss')
plt.plot(epochs, val_loss3, 'r-', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Loss (km)')
plt.title('Infrasonic Source Localization Results - Distance (Acoustical Input Only)')
plt.legend()

plt.savefig("Ablated_Model_Distance_FFT_Only.png")

plt.show()

In [None]:
pd.DataFrame(train_loss3).to_csv("ablated_distance_fftonly_train_loss")
pd.DataFrame(val_loss3).to_csv("ablated_distance_fftonly_val_loss")

**Ablation for Altitude**

In [None]:
#atmo only model

atmoInput = Input(shape = (51, 1))
x = atmoInput

numLayersFinal = 8
for i in range(0, numLayersFinal):
    x = Dense(64, activation='relu')(x)

#final output layer
output = Dense(1)(x)

#model compilation
model4 = Model(inputs=atmoInput, outputs=output)

opt = Adam(learning_rate=0.001)
model4.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

model4.summary()

In [None]:
numEpochs = 100
size = 64

history4 = model4.fit(
    atmo_train, altitude_train,
    validation_data=(atmo_val, altitude_val),
    epochs=numEpochs,
    batch_size=size,
    verbose=1
)

model4.save("altitude_model_atmo_only.h5")

In [None]:
# Extract loss values
train_loss4 = history4.history['mean_absolute_error']
val_loss4 = history4.history['val_mean_absolute_error']

# Create a range of epoch numbers
epochs = range(1, len(train_loss4) + 1)

# Plotting the training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss4, 'b-', label='Training Loss')
plt.plot(epochs, val_loss4, 'r-', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Loss (km)')
plt.title('Infrasonic Source Localization Results - Altitude (Atmospheric Data Input Only)')
plt.legend()

plt.savefig("Ablated_Model_Altitude_Atmo_Only.png")

plt.show()

**Max Pool Utility Analysis**

In [None]:
# Model with no max pool layer (in response to concerns raised by Ferguson et al 2016, 2017 (ICASSP))

#atmostats input model
atmoInput = Input(shape = (51, 1))
x = atmoInput

atmostatsOutput = Flatten()(x)

#fft input model
fftInput = Input(shape = (4096, 1))
y = fftInput

numLayersFFT = 3
for i in range(0, numLayersFFT):
    y = Conv1D(64, kernel_size=4, padding='same', strides = 1, activation='relu')(y)

fftOutput = Flatten()(y)

#concatenate, feed through another DNN
concatenatedInput = concatenate([atmostatsOutput, fftOutput])
z = concatenatedInput

numLayersFinal = 4
for i in range(0, numLayersFinal):
    z = Dense(64, activation='relu')(z)

#final output layer
output = Dense(1)(z)

#model compilation
model5 = Model(inputs=[atmoInput, fftInput], outputs=output)

opt = Adam(learning_rate=0.001)
model5.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

model5.summary()

In [None]:
numEpochs = 50
size = 64

history5 = model5.fit(
    [atmo_train, fft_train], altitude_train,
    validation_data=([atmo_val, fft_val], altitude_val),
    epochs=numEpochs,
    batch_size=size,
    verbose=1
)

model5.save("altitude_model_nomaxpool.h5")

In [None]:
# Extract loss values
train_loss5 = history5.history['mean_absolute_error']
val_loss5 = history5.history['val_mean_absolute_error']

# Create a range of epoch numbers
epochs = range(1, len(train_loss5) + 1)

# Plotting the training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss5, 'b-', label='Training Loss')
plt.plot(epochs, val_loss5, 'r-', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Loss (km)')
plt.title('Infrasonic Source Localization Results - Altitude (No Max Pool)')
plt.legend()

plt.savefig("Model_Altitude_NoMaxPool.png")

plt.show()

In [None]:
pd.DataFrame(train_loss5).to_csv("altitude_nomaxpool_train_loss")
pd.DataFrame(val_loss5).to_csv("altitude_nomaxpool_val_loss")

In [None]:
model6 = Model(inputs=[atmoInput, fftInput], outputs=output)

opt = Adam(learning_rate=0.001)
model6.compile(optimizer=opt, loss='mean_squared_error', metrics=['mean_absolute_error'])

model6.summary()

In [None]:
numEpochs = 50
size = 64

history6 = model6.fit(
    [atmo_train, fft_train], distance_train,
    validation_data=([atmo_val, fft_val], distance_val),
    epochs=numEpochs,
    batch_size=size,
    verbose=1
)

model6.save("distance_model_nomaxpool.h5")

In [None]:
# Extract loss values
train_loss6 = history6.history['mean_absolute_error']
val_loss6 = history6.history['val_mean_absolute_error']

# Create a range of epoch numbers
epochs = range(1, len(train_loss6) + 1)

# Plotting the training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_loss6, 'b-', label='Training Loss')
plt.plot(epochs, val_loss6, 'r-', label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Loss (km)')
plt.title('Infrasonic Source Localization Results - Distance (No Max Pool)')
plt.legend()

plt.savefig("Model_Distance_NoMaxPool.png")

plt.show()

In [None]:
pd.DataFrame(train_loss6).to_csv("distance_nomaxpool_train_loss")
pd.DataFrame(val_loss6).to_csv("distance_nomaxpool_val_loss")