### Imports for the ipynb

In [1]:
import numpy as np
import pandas as pd
import transformations
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

2024-11-03 23:45:02.223477: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Data pre-processing

In [2]:

buoy_data = pd.read_csv('buoyData_11-3-24.csv')

# Convert `datetime` column to datetime format
buoy_data['datetime'] = pd.to_datetime(buoy_data['datetime'])

# Interpolate missing numeric values
numeric_columns = buoy_data.select_dtypes(include=['float64', 'int64']).columns
buoy_data[numeric_columns] = buoy_data[numeric_columns].interpolate()

# Drop column with all missing values
buoy_data = buoy_data.drop(['DEWP'], axis=1)

# Encode categorical columns using LabelEncoder
categorical_columns = ['SwD', 'WWD', 'Steepness']
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    buoy_data[col] = le.fit_transform(buoy_data[col].astype(str))
    label_encoders[col] = le

# Extract time-of-year features and apply sine/cosine transformations
buoy_data['month'] = buoy_data['datetime'].dt.month
buoy_data['day_of_year'] = buoy_data['datetime'].dt.dayofyear

# Sine/Cosine transformations for cyclical encoding
buoy_data['month_sin'] = np.sin(2 * np.pi * buoy_data['month'] / 12)
buoy_data['month_cos'] = np.cos(2 * np.pi * buoy_data['month'] / 12)
buoy_data['day_sin'] = np.sin(2 * np.pi * buoy_data['day_of_year'] / 365)
buoy_data['day_cos'] = np.cos(2 * np.pi * buoy_data['day_of_year'] / 365)

# Drop columns that are not needed anymore
buoy_data = buoy_data.drop(['datetime', 'month', 'day_of_year'], axis=1)

# Normalize all numeric features using MinMaxScaler
scaler = MinMaxScaler()
numeric_features = buoy_data.select_dtypes(include=['float64', 'int64']).columns
buoy_data[numeric_features] = scaler.fit_transform(buoy_data[numeric_features])


#Uncomment below to see the data sent to the model in a csv
#buoy_data.to_csv('processed_buoy_data.csv', index=False)

print(buoy_data.head())
print(buoy_data.shape)

       WVHT       SwH       SwP       WWH       WWP     SwD   WWD  Steepness  \
0  0.461538  0.621622  0.679389  0.142857  0.177419  0.4375  0.00   0.666667   
1  0.615385  0.729730  0.679389  0.178571  0.209677  0.8750  0.00   0.666667   
2  0.538462  0.675676  0.603053  0.178571  0.338710  0.8750  0.25   0.666667   
3  0.538462  0.675676  0.679389  0.142857  0.209677  0.8750  0.00   0.666667   
4  0.769231  0.837838  0.679389  0.142857  0.225806  0.4375  0.00   0.666667   

        APD       MWD  ...      WSPD       GST       DPD      PRES      ATMP  \
0  0.517241  0.871866  ...  0.416667  0.428571  0.692308  0.420168  0.804878   
1  0.655172  0.832869  ...  0.500000  0.428571  0.666667  0.411765  0.780488   
2  0.586207  0.832869  ...  0.500000  0.500000  0.641026  0.436975  0.780488   
3  0.603448  0.832869  ...  0.416667  0.428571  0.692308  0.453782  0.756098   
4  0.844828  0.860724  ...  0.500000  0.428571  0.692308  0.478992  0.780488   

       WTMP  month_sin  month_cos   da

### Model building

In [None]:
# 1. Define feature columns 
feature_columns = buoy_data.columns.drop('WVHT')

# 2. Scale data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(buoy_data[feature_columns])

# 3. Prepare Sequences for Training

sequence_length = 48  #Need to adjust this

# Function to create sequences
def create_sequences(data, target, sequence_length):
    X, y = [], []
    for i in range(len(data) - sequence_length):
        X.append(data[i:i + sequence_length])
        y.append(target[i + sequence_length])
    return np.array(X), np.array(y)

# Create sequences
X, y = create_sequences(scaled_data, buoy_data['WVHT'].values, sequence_length)

# Split data into train and test sets
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# 4. Build the LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(50),
    Dense(1)
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# 5. Train the Model
history = model.fit(X_train, y_train, epochs=10, batch_size=16, validation_split=0.2)

# 6. Evaluate the Model
loss = model.evaluate(X_test, y_test)
print("Test Loss:", loss)

### Post-processing


In [None]:
#Transit time and transformations
# Calculate transit time and adjusted wave height at Pipeline
#buoy_data['transit_time'] = buoy_data['SwP'].apply(transformations.calculate_transit_time)
#buoy_data['pipeline_wave_height'] = transformations.predict_pipeline_wave_height_vectorized(buoy_data['WVHT'], buoy_data['SwP'], buoy_data['SwD'])