In [103]:
import xarray as xr
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from copy import deepcopy

In [105]:
reach = 6

In [106]:
zarr_array = xr.open_zarr(f'SW_0{reach}_SWP_01_2025-06-13.zarr')

depth_data = zarr_array['depth'][::7].values
date_data = zarr_array['time'][::7].values
zarr_array.close()
template_map = np.array([len(depth[~np.isnan(depth)]) for depth in depth_data]).argmax()
original_shape = depth_data[template_map].shape


mask = np.isnan(depth_data[template_map])
mask
averaged_depth = np.nanmean(depth_data, axis=(0))


  averaged_depth = np.nanmean(depth_data, axis=(0))


In [107]:

depth_data_post = depth_data[date_data > np.datetime64('2021-01-01')]
date_data_post = date_data[date_data > np.datetime64('2021-01-01')]


In [108]:
def random_masking(depths,mask,scale=1):
    random_mask = deepcopy(depths)
    for array in depths:
        array[mask] = np.random.random(size=np.sum(mask))*scale
        array[array==None] = 0
        array[array==np.nan] = 0
    return random_mask      
        
        

In [109]:
weeks_in=8
weeks_out=4

In [110]:
dim_1= int(np.ceil(depth_data.shape[1]/2))
dim_2= int(np.ceil(depth_data.shape[2]/2))

In [111]:
def downscale_array_avg(array, factor):
    reshaped_array = array.reshape((array.shape[0] // factor, factor, array.shape[1] // factor, factor))
    downscaled_array = reshaped_array.mean(axis=(1, 3))
    return downscaled_array

In [112]:
output_shape = (dim_1, dim_2)

In [None]:
# Define the depth array
depth_array = depth_data_post[10]
normalized_depths = depth_array / np.max(depth_array)
# Create an RGB image where depths < 48 appear as red
colored_depths = np.zeros((*depth_array.shape, 3))  # Create an RGB image
colored_depths[..., 0] = np.where(depth_array < 50, 1, 0)  # Red channel
colored_depths[..., 1] = 0  # Green channel
colored_depths[..., 2] = np.where(depth_array > 50, 1, 0)  # Blue channel

plt.imshow(colored_depths)
plt.show()

In [None]:
from skimage.transform import resize
reduced_depths = [depth for depth in depth_data if len(depth_data[template_map][~np.isnan(depth_data[template_map])]) == len(depth[~np.isnan(depth)])]
reduced_depths = [resize(depth, output_shape, anti_aliasing=True) for depth in reduced_depths]
plt.imshow(reduced_depths[10])
plt.colorbar()
plt.show()

In [115]:
def dredge_percent_collection(depth,date):
    dredge_percent=[]
    threshold_deepening = 50
    threshold_pre_deepening = 45
    combined= [(pos, time) for pos, time in zip(depth, date)]
    for item in combined:
        if item[1]< np.datetime64('2020-01-01'):
            dredge = (item[0]< threshold_pre_deepening).sum()/(~np.isnan(item[0])).sum()
            dredge_percent.append([dredge,item[1]])
        else:
            dredge = (item[0]< threshold_deepening).sum()/(~np.isnan(item[0])).sum()
            dredge_percent.append([dredge,item[1]])
    return dredge_percent

In [116]:
dredge_percent = dredge_percent_collection(depth_data,date_data)

In [117]:
dredge_percent = np.array(dredge_percent,dtype='object')

In [None]:
# Define the depth array
depth_array = depth_data_post[10]
normalized_depths = depth_array / np.max(depth_array)
# Create an RGB image where depths < 48 appear as red
colored_depths = np.zeros((*depth_array.shape, 3))  # Create an RGB image
colored_depths[..., 0] = np.where(depth_array < 50, 1, 0)  # Red channel
colored_depths[..., 1] = 0  # Green channel
colored_depths[..., 2] = np.where(depth_array > 50, 1, 0)  # Blue channel

plt.imshow(colored_depths)
plt.text(200, 200, f"Dredge: {dredge_percent[10][0]:.2f}", color='white')
plt.show()

In [None]:
plt.hist(dredge_percent[:,0]*100,bins=10)
plt.xlabel('Prcent of channel area with depth below threshold')
plt.title('Percent below threshold histogram')
plt.text(10,125,f"mean:{np.mean(dredge_percent[:,0]):.2f}")
plt.text(10,150,f"median:{np.median(dredge_percent[:,0]):.2f}")
plt.show()

In [121]:
max_depth =80
min_depth = 20
percent_below = dredge_percent[:,0]
scaled_depths = [(arr - min_depth) / (max_depth - min_depth) for arr in reduced_depths]

In [None]:
plt.imshow(scaled_depths[10])
plt.colorbar()
plt.show()

In [123]:

reduced_mask = np.isnan(reduced_depths[0])

In [124]:
masked_depths = random_masking(scaled_depths,reduced_mask,0)
masked_depths = random_masking(scaled_depths,reduced_mask,0)

In [None]:
plt.imshow(masked_depths[1])
plt.colorbar()
plt.show()

In [126]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
percent_below_scaler = scaler.fit(percent_below.reshape(-1,1))

In [127]:
percent_below_scaled = percent_below_scaler.transform(percent_below.reshape(-1,1))

In [128]:
def create_sequences(data, input_length, output_length,y_predefined= None,date=None):
    X, y = [], []
    for i in range(len(data) - input_length - output_length + 1):
        X.append(data[i:i + input_length:7])
        if y_predefined.any()== None:
            y.append(np.nanmin(data[i + input_length + output_length-1]))
        else:
            y.append(y_predefined[i+input_length+output_length-1])

    return np.array(X), np.array(y)


sequence_depths_X,sequence_depths_y = create_sequences(masked_depths, weeks_in*7, weeks_out*7,percent_below_scaled)

In [None]:
#train_X = np.moveaxis(train_X, 1, -1)
#test_X = np.moveaxis(test_X, 1, -1)
num_samples = len(sequence_depths_X)
indices = np.arange(num_samples)

train_X, test_X, train_y, test_y,  train_idx, test_idx = train_test_split(sequence_depths_X, sequence_depths_y, indices, test_size=0.2, random_state=42)
print(train_X.shape)

# Print shapes to verify

train_X = np.transpose(train_X,(0,2,3,1))
print(train_X.shape)
test_X = np.transpose(test_X,(0,2,3,1))

train_dataset = tf.data.Dataset.from_tensor_slices((train_X, train_y[:,0].tolist()))
test_dataset = tf.data.Dataset.from_tensor_slices((test_X, test_y[:,0].tolist()))

train_dataset = train_dataset.batch(64)
test_dataset = test_dataset.batch(64)


In [None]:
# Calculate the number of samples corresponding to the final 6 weeks

validation_start_index =  -6

# Create validation dataset (only input data)
validation_X = sequence_depths_X[validation_start_index:-5]

# Print shape to verify
validation_X = np.transpose(validation_X,(0,2,3,1))
print("Validation X shape:", validation_X.shape)

In [131]:
train_dates = date_data[train_idx]
test_dates = date_data[test_idx]

In [132]:

import visualkeras

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import ConvLSTM2D, Dense, MaxPool3D, Flatten, Input,Dropout,Conv2D,Conv3D, MaxPool2D,GlobalAveragePooling2D,BatchNormalization

# Define your model
model = Sequential()
optimizer= tf.keras.optimizers.Adam(learning_rate=0.00005)

model = Sequential()
model.add(Conv2D(filters=16, kernel_size=(4, 4), activation='relu', input_shape=( dim_1, dim_2,weeks_in)))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(4, 4)))

model.add(Conv2D(filters=32, kernel_size=(4, 4), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(4, 4)))

model.add(Conv2D(filters=64, kernel_size=(4, 4), activation='relu'))
model.add(BatchNormalization())
model.add(MaxPool2D(pool_size=(4, 4)))


model.add(Conv2D(filters=16, kernel_size=(2, 2), activation='relu'))
model.add(GlobalAveragePooling2D())
model.add(Flatten())
model.add(Dropout(.2))
model.add(Dense(64, activation='sigmoid'))
model.add(Dense(1))

# Compile your model
model.compile(optimizer=optimizer, loss='mse',metrics = ['mae'])


visualkeras.layered_view(model).show() # display using your system viewer

visualkeras.layered_view(model)

In [None]:
history = model.fit(train_dataset,epochs=300, validation_data=(test_dataset),callbacks=tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",patience=15, start_from_epoch=0,restore_best_weights=True))

In [None]:
pred_y = model.predict(test_X,verbose=True)

In [None]:
plt.imshow(test_X[0,:,:,0])

In [None]:
import tensorflow as tf
import numpy as np

# Define the Monte Carlo Dropout function to run stochastic forward passes
def monte_carlo_dropout(model, input_data, num_samples=100):
    """
    Run multiple stochastic forward passes with dropout on during inference.
    Args:
    - model: Trained model.
    - input_data: Input data for the model (e.g., a batch of samples).
    - num_samples: Number of forward passes to make for uncertainty estimation.
    
    Returns:
    - mean_pred: Mean prediction from multiple passes.
    - uncertainty: Standard deviation (uncertainty) from the predictions.
    """
    
    # Use tf.function for speed-up (graph execution)
    @tf.function
    def predict_with_dropout(inputs):
        # 1 indicates training mode, which means dropout will be active during inference
        return model(inputs, training=True)

    # Perform multiple stochastic forward passes
    predictions = []
    for _ in range(num_samples):
        pred = predict_with_dropout(input_data)
        predictions.append(pred.numpy())  # Convert tensor to numpy array
    
    predictions = np.array(predictions)
    
    # Calculate the mean and standard deviation (uncertainty) from the predictions
    mean_pred = predictions.mean(axis=0)
    uncertainty = predictions.std(axis=0)
    
    return mean_pred, uncertainty

In [None]:
model.save(f'reach_{reach}_CNN.keras')

In [None]:
# Perform Monte Carlo Dropout and get both predictions and uncertainty
mean_pred, uncertainty = monte_carlo_dropout(model, test_X, num_samples=25)

In [None]:
import matplotlib.dates as mdates
from datetime import datetime
import pandas as pd

In [None]:
x = np.arange(len(pred_y[50:65]))  # Create an array of indices for the bars

dates_np = date_data[test_idx][50:65]
dates = [pd.Timestamp(d).to_pydatetime() for d in dates_np]

# Convert datetime objects to string in desired format
dates_str = [d.strftime('%Y-%m-%d') for d in dates]

width = 0.4  # Width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, test_y[50:65,0]*100
                ,width, label='Actual (test_y)')
rects2 = ax.bar(x + width/2, pred_y[50:65].flatten()
                *100
                ,width, label='Predicted (pred_y)')

errorbar= ax.errorbar(x + width/2, pred_y[50:65].flatten()*100, yerr=uncertainty[50:65].flatten()*100, fmt='o', color='black', label='Uncertainty')

# Add labels, title, and legend
ax.set_xlabel('Date')
ax.set_ylabel('Percent Below Threshold')
ax.set_title('Comparison of Actual vs Predicted')
ax.set_xticks(x)
ax.set_xticklabels(dates_str, rotation=45, ha='right')
ax.legend()

# Optionally, add value labels on top of the bars
def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height."""
    for rect in rects:
        height = rect.get_height()
        ax.annotate(f'{height:.1f}',  # Format to 2 decimal places
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

# Rotate date labels for better readability
plt.xticks(rotation=45)
plt.ylim(0,80)
fig.tight_layout()



plt.show()


In [None]:
rmse = np.sqrt(np.mean((pred_y[:,0] - test_y.flatten()) ** 2))
mae = np.mean(np.abs((pred_y[:,0] - test_y.flatten())))
print(f"RMSE:{rmse.round(3)}")
print(f"Mae:{mae.round(3)}")

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Example: replace these with your actual data
# pred_y = np.array([...])
# test_y = np.array([...])
# uncertainty = np.array([...])  # same shape as test_y

plt.figure(figsize=(8, 6))

# Scatter plot with error bars
plt.errorbar(test_y, pred_y.flatten(), yerr=uncertainty.flatten(), fmt='o', ecolor='gray', alpha=0.6)

# Reference line: perfect predictions
plt.plot([0, 1], [0, 1], 'r--')

# Labels and title
plt.xlabel("True Values (test_y)")
plt.ylabel("Predicted Values (pred_y)")
plt.title(f"Reach {reach} Scatter Plot")
plt.grid(True)
plt.show()

In [None]:
rmse = np.sqrt(np.mean((test_y[:,0] - pred_y.flatten()) ** 2))
mae = np.mean(np.abs((test_y[:,0] - pred_y.flatten())))
print(f"uRMSE: {rmse:.3f}")
print(f"uMAE: {mae:.3f}")