In [19]:
import torch
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [None]:
df = pd.read_csv("data/Coastal Data System - Waves (Mooloolaba) 01-2017 to 06 - 2019.csv")
df.head()


In [None]:
df.replace(-99.90, np.nan, inplace=True)
df.drop('Date/Time',axis=1,inplace=True)
df.dropna(inplace=True)
df.reset_index(drop=True,inplace=True)
df.head()

In [None]:
df_graph = df.loc[0:100]

plt.figure(figsize=(15,22))
plt.subplot(6,2,1)
plt.plot(df_graph['Hs'],color='blue')
plt.title('Significant Wave Height')

plt.subplot(6,2,2)
plt.plot(df_graph['Hmax'],color='red')
plt.title('Maximum Wave Height')

plt.subplot(6,2,3)
plt.plot(df_graph['Tz'],color='orange')
plt.title("Zero Upcrossing Wave Period")

plt.subplot(6,2,4)
plt.plot(df_graph['Tp'],color='brown')
plt.title("The Peak Energy Wave Period")

plt.subplot(6,2,5)
plt.plot(df_graph['Peak Direction'],color='purple')
plt.title("Direction related to True North")

plt.subplot(6,2,6)
plt.plot(df_graph['SST'],color='green')
plt.title("Sea Surface Temperature")
plt.show()

In [None]:
print(df.info())

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(7,7))
sns.heatmap(df.corr(),linewidth=.1,annot=True,cmap="YlGnBu")
plt.title("Correlation Matrix")
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

#Scaling all values between 1 and 0
scaler = MinMaxScaler(feature_range=(0,1))
data = scaler.fit_transform(df)
print('Shape of the scaled data matrix: ',data.shape)

In [None]:
data[0]

In [None]:
#Train Test Split

train = data[:42000,]
test = data[42000: ,]

#Check the shapes of the datasets
print('Shape of train data: ',train.shape)
print('Shape of test data: ',test.shape)

In [31]:

def create_dataset(dataset,lookback):
    """
    Transform the dataset array into a torch tensor
    """
    X_, y_ = [],[]
    for i in range(len(dataset) - lookback - 1):
        feature = dataset[i:i+lookback]
        target = dataset[i+lookback+1]
        X_.append(feature)
        y_.append(target)
        
    X = np.array(X_)
    X = np.reshape(X,(X.shape[0],X.shape[2],X.shape[1]))
    y = np.array(y_)
    return torch.tensor(X,dtype=torch.float32),torch.tensor(y,dtype=torch.float32)

In [None]:
lookback = 30

X_train, y_train = create_dataset(train,lookback)
X_test,y_test = create_dataset(test,lookback)

print(f'X_train shape: {X_train.shape} , y_train shape :{y_train.shape}')
print('X_test shape :',X_test.shape, ' y_test shape: ',y_test.shape)

In [None]:
(X_train.shape[1],X_train.shape[2])

In [None]:
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader,TensorDataset

class WaveLSTM(nn.Module):
    def __init__(self, input_dim,hidden_dim1=32,hidden_dim2=16,hidden_dim3=10,output_dim=6,dropout=0.2):
        super(WaveLSTM,self).__init__()
        self.lstm1 = nn.LSTM(input_dim,hidden_dim1,batch_first=True,bidirectional=False)
        self.lstm2 = nn.LSTM(hidden_dim1,hidden_dim2,batch_first=True,bidirectional=False)
        self.dropout = nn.Dropout(dropout)
        self.lstm3 = nn.LSTM(hidden_dim2,hidden_dim3,batch_first=True)
        self.fc = nn.Linear(hidden_dim3,output_dim)
        
    def forward(self,x):
        out,_ = self.lstm1(x)
        out,_ = self.lstm2(out)
        out = self.dropout(out)
        out,_ = self.lstm3(out)
        out = self.fc(out[:, -1, :]) #Using the last time's step's out for the dense layer
        return out
    
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

input_dim = 30

model = WaveLSTM(input_dim).to(device)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(),lr=1e-3)

batch_size = 200
train_dataset = TensorDataset(X_train,y_train)
train_loader = DataLoader(train_dataset,batch_size=batch_size,shuffle=True)
test_dataset = TensorDataset(X_test,y_test)
test_loader = DataLoader(test_dataset,batch_size=batch_size,shuffle=False)

num_epochs = 15
train_hist = []
test_hist = []

for epoch in range(num_epochs):
    total_loss = 0.0
    model.train()
    for batch_X, batch_y in train_loader:
        batch_X,batch_y = batch_X.to(device),batch_y.to(device)
        predictions = model(batch_X)
        loss = loss_fn(predictions,batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    average_loss = total_loss / len(train_loader)
    train_hist.append(average_loss)
    
    model.eval()
    with torch.inference_mode():
        total_test_loss = 0.0
        
        for batch_X_test,batch_y_test in test_loader:
            batch_X_test,batch_y_test = batch_X_test.to(device),batch_y_test.to(device)
            test_pred = model(batch_X_test)
            test_loss = loss_fn(test_pred,batch_y_test)
            
            total_test_loss += test_loss.item()
        
        average_test_loss = total_test_loss / len(test_loader)
        test_hist.append(average_test_loss)
    
    print(f"Epoch[{epoch + 1}/{num_epochs}] - Training Loss: {average_loss:.4f}, Test Loss: {average_test_loss}")



In [None]:
x = np.linspace(1,num_epochs,num_epochs)
plt.plot(x,train_hist,scalex=True,label="Training loss")
plt.plot(x,test_hist,label="Validation Loss")
plt.legend()
plt.show()

In [44]:
#MAKING PREDICTIONS


# Define the function for predictions
def predicting(model, data, y_real, scaler):
    # Set the model to evaluation mode
    model.eval()
    
    # Ensure no gradients are calculated during inference
    with torch.no_grad():
        # Perform prediction
        predicted_data = model(data)
    
    # Invert scaling process to get the original value ranges
    predicted_data = scaler.inverse_transform(predicted_data.numpy())
    y_real = scaler.inverse_transform(y_real.numpy())
    
    return predicted_data, y_real

# Execute predictions
train_prediction, y_train_scaled = predicting(model, X_train, y_train, scaler)
test_prediction, y_test_scaled = predicting(model, X_test, y_test, scaler)


In [None]:
print(train_prediction.shape,predictions.shape)
train_prediction==y_train

In [None]:
import math 
from sklearn.metrics import mean_squared_error

def examine_rmse(y_true,y_pred):
    Score_Hs = math.sqrt(mean_squared_error(y_true[:,0],   y_pred[:,0]))
    Score_Hmax = math.sqrt(mean_squared_error(y_true[:,1], y_pred[:,1]))
    Score_Tz = math.sqrt(mean_squared_error(y_true[:,2],   y_pred[:,2]))
    Score_Tp = math.sqrt(mean_squared_error(y_true[:,3],   y_pred[:,3]))
    Score_Dir = math.sqrt(mean_squared_error(y_true[:,4],  y_pred[:,4]))
    Score_SST = math.sqrt(mean_squared_error(y_true[:,5],  y_pred[:,5]))
    
    print('RMSE_Hs       : ', Score_Hs)
    print('RMSE_Hmax     : ', Score_Hmax)
    print('RMSE_Tz       : ', Score_Tz)
    print('RMSE_Tp       : ', Score_Tp)
    print('RMSE_Direction: ', Score_Dir)
    print('RMSE_SST      : ', Score_SST)

In [None]:
# Executing the RMSE comparison
print('Trainin Data Errors')
print(examine_rmse(train_prediction, y_train_scaled),'\n')
print('Test Data Errors')
print(examine_rmse(test_prediction, y_test_scaled))

In [None]:
plt.figure(figsize=(17,25))


plt.subplot(6,2,1)
plt.plot(test_prediction[1300:,0], color='red', alpha=0.7, label='prediction')
plt.plot(y_test_scaled[1300:,0], color='blue', alpha=0.5, label='real')
plt.title('Significant Wave Height')
plt.legend()
plt.grid(axis='y')

plt.subplot(6,2,2)
plt.plot(test_prediction[1300:,1], color='red', alpha=0.7, label='prediction')
plt.plot(y_test_scaled[1300:,1], color='blue', alpha=0.5, label='real')
plt.title('Maximum Wave Height')
plt.legend()
plt.grid(axis='y')

plt.subplot(6,2,3)
plt.plot(test_prediction[1300:,2], color='red', alpha=0.7, label='prediction')
plt.plot(y_test_scaled[1300:,2], color='blue', alpha=0.5, label='real')
plt.title('Zero Upcrossing Wave Period')
plt.legend()
plt.grid(axis='y')

plt.subplot(6,2,4)
plt.plot(test_prediction[1300:,3], color='red', alpha=0.7, label='prediction')
plt.plot(y_test_scaled[1300:,3], color='blue', alpha=0.5, label='real')
plt.title('Peak Energy Wave Period')
plt.legend()
plt.grid(axis='y')

plt.subplot(6,2,5)
plt.plot(test_prediction[1300:,4], color='red', alpha=0.7, label='prediction')
plt.plot(y_test_scaled[1300:,4], color='blue', alpha=0.5, label='real')
plt.title('Direction Related to True North')
plt.legend()
plt.grid(axis='y')

plt.subplot(6,2,6)
plt.plot(test_prediction[1300:,5], color='red', alpha=0.7, label='prediction')
plt.plot(y_test_scaled[1300:,5], color='blue', alpha=0.5, label='real')
plt.title('Sea Surface Temperature')
plt.legend()
plt.grid(axis='y')
plt.show();
