In [1]:
import glob
import xarray as xr
import netCDF4
import h5netcdf
import scipy
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal as sig
import cartopy.crs as ccrs
import cartopy.feature as cfeature

In [2]:
file_path='../Data/'
data_list_in=glob.glob(file_path+'ds_in*')
data_list_out=glob.glob(file_path+'ds_out*')
data_list_in.sort()
data_list_out.sort()
print(data_list_in)
print(data_list_out)

['../Data/ds_in.csv', '../Data/ds_inBering.csv', '../Data/ds_inSH.csv', '../Data/ds_inWeddell.csv']
['../Data/ds_out.csv', '../Data/ds_outBering.csv', '../Data/ds_outSH.csv', '../Data/ds_outWeddell.csv']


In [3]:
data_arrays_in = [np.loadtxt(file, delimiter=',') for file in data_list_in]
data_arrays_out = [np.loadtxt(file, delimiter=',') for file in data_list_out]

# Print the shapes of the arrays to verify
for i, array in enumerate(data_arrays_in):
    print(f"Shape of data_arrays_in[{i}]: {array.shape}")

for i, array in enumerate(data_arrays_out):
    print(f"Shape of data_arrays_out[{i}]: {array.shape}")

Shape of data_arrays_in[0]: (45693, 13)
Shape of data_arrays_in[1]: (41227, 13)
Shape of data_arrays_in[2]: (44050, 13)
Shape of data_arrays_in[3]: (15356, 13)
Shape of data_arrays_out[0]: (45693, 12)
Shape of data_arrays_out[1]: (41227, 12)
Shape of data_arrays_out[2]: (44050, 12)
Shape of data_arrays_out[3]: (15356, 12)


In [4]:
# Reshape each array in data_arrays_in to the specified shape and concatenate them
reshaped_data_arrays_in = np.concatenate([array.reshape(-1, 13) for array in data_arrays_in])

# Print the shape of the reshaped array to verify
print(f"Shape of reshaped_data_arrays_in: {reshaped_data_arrays_in.shape}")

Shape of reshaped_data_arrays_in: (146326, 13)


In [5]:
# Reshape each array in data_arrays_out to the specified shape and concatenate them
reshaped_data_arrays_out = np.concatenate([array.reshape(-1, 12) for array in data_arrays_out])

# Print the shape of the reshaped array to verify
print(f"Shape of reshaped_data_arrays_out: {reshaped_data_arrays_out.shape}")

Shape of reshaped_data_arrays_out: (146326, 12)


In [6]:
# Clean data_arrays_in and data_arrays_out
cleaned_data_arrays_in = []
cleaned_data_arrays_out = []

for i in range(reshaped_data_arrays_in.shape[0]):
        if not np.sum(reshaped_data_arrays_out[i])==0:
            cleaned_data_arrays_in.append(reshaped_data_arrays_in[i])
            cleaned_data_arrays_out.append(reshaped_data_arrays_out[i])

# Convert lists back to numpy arrays if needed
cleaned_data_arrays_in = np.array(cleaned_data_arrays_in)
cleaned_data_arrays_out = np.array(cleaned_data_arrays_out)

# Print the shapes of the cleaned arrays to verify
print(f"Shape of cleaned_data_arrays_in: {cleaned_data_arrays_in.shape}")
print(f"Shape of cleaned_data_arrays_out: {cleaned_data_arrays_out.shape}")

Shape of cleaned_data_arrays_in: (131674, 13)
Shape of cleaned_data_arrays_out: (131674, 12)


In [7]:
# Calculate the orders of magnitude difference
def calculate_orders_of_magnitude_difference(predictions, actuals):
    differences = torch.abs(predictions - actuals)
    orders_of_magnitude_diff = torch.log10(differences + 1e-10) - torch.log10(torch.abs(actuals) + 1e-10)
    return torch.mean(torch.abs(orders_of_magnitude_diff))

In [9]:
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Convert data to TensorFlow tensors
X_train, X_test, y_train, y_test = train_test_split(cleaned_data_arrays_in, cleaned_data_arrays_out, test_size=0.2, random_state=42)
X_train_tensor = tf.convert_to_tensor(X_train, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_train_tensor = tf.convert_to_tensor(y_train, dtype=tf.float32)
y_test_tensor = tf.convert_to_tensor(y_test, dtype=tf.float32)

# Define the neural network architecture
class PhysicsInformedNN(tf.keras.Model):
    def __init__(self):
        super(PhysicsInformedNN, self).__init__()
        self.hidden1 = tf.keras.layers.Dense(100, activation='relu')
        self.hidden2 = tf.keras.layers.Dense(100, activation='relu')
        self.output_layer = tf.keras.layers.Dense(y_train_tensor.shape[1])
    
    def call(self, x):
        x = self.hidden1(x)
        x = self.hidden2(x)
        x = self.output_layer(x)
        return x

# Initialize the model, loss function, and optimizer
model = PhysicsInformedNN()
criterion = tf.keras.losses.MSE()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

# Custom loss function to enforce the sum of outputs to be zero
def custom_loss(y_true, y_pred):
    mse_loss = criterion(y_true, y_pred)

    sign_penalty = 0
    for i in range(y_pred.shape[1]):
        sign_penalty += tf.reduce_mean(tf.nn.relu(-y_pred[:, i] * tf.sign(y_true[:, i])))
    sign_penalty = sign_penalty / y_pred.shape[1]
    
    sign_loss = sign_penalty ** 2
    sum_constraint = tf.reduce_sum(y_pred, axis=1)
    sum_loss = tf.reduce_mean(sum_constraint ** 2)
    return mse_loss + sum_loss + 100000 * sign_loss

# Compile the model with the custom loss function
model.compile(optimizer=optimizer, loss=custom_loss)

# Training loop
num_epochs = 1000
history = model.fit(X_train_tensor, y_train_tensor, epochs=num_epochs, batch_size=32, verbose=2)

# Evaluate the model
mse = model.evaluate(X_test_tensor, y_test_tensor, verbose=0)
print(f'Test Mean Squared Error: {mse:.20f}')

2024-10-26 19:11:59.234682: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 19:11:59.384010: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


TypeError: mean_squared_error() missing 2 required positional arguments: 'y_true' and 'y_pred'

In [None]:
k=25000
print(model(torch.tensor(ds_in[k], dtype=torch.float32)))
print(sum(model(torch.tensor(ds_in[k], dtype=torch.float32))))
print(ds_in[k])
print(ds_out[k])

In [None]:
negative_sign_differences = 0
#ds_in_combine, ds_out_combine
for i in range(len(ds_in_combine)):
    predicted_output = model(torch.tensor(ds_in_combine[i], dtype=torch.float32))
    actual_output = torch.tensor(ds_out_combine[i])
    
    # Compare the signs of the predicted and actual outputs
    predicted_signs = torch.sign(predicted_output)
    actual_signs = torch.sign(actual_output)
    
    # Count the number of different signs
    negative_sign_differences += torch.sum(predicted_signs != actual_signs).item()

print(f'Total number of different negative signs: {negative_sign_differences} As a %: {100*negative_sign_differences/(len(ds_in_combine)*len(ds_in_combine[0]))}')

In [None]:
# Evaluate the model on the test set
model.eval()
with torch.no_grad():
    test_predictions = model(X_test_tensor)
    avg_orders_of_magnitude_diff = calculate_orders_of_magnitude_difference(test_predictions, y_test_tensor)
    print(f'Average Orders of Magnitude Difference: {avg_orders_of_magnitude_diff.item():.6f}')