In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras

In [3]:
raw_csv = pd.read_csv('Shifting_Seas_preprocessed.csv')
df = raw_csv.copy()
df.head()

Unnamed: 0,Year,Month,Tropical_group,Islands_group,Red_Sea,Latitude,Longitude,SST (°C),pH Level,None,Low_to_Medium_Bleaching,High,Species Observed,Marine Heatwave
0,2015,1,0,0,1,20.0248,38.4931,29.47,8.107,1,0,0,106,0
1,2015,1,1,0,0,-18.2988,147.7782,29.65,8.004,0,0,1,116,0
2,2015,1,1,0,0,14.9768,-75.0233,28.86,7.947,0,0,1,90,0
3,2015,1,1,0,0,-18.3152,147.6486,28.97,7.995,0,1,0,94,0
4,2015,1,0,1,0,-0.8805,-90.9769,28.6,7.977,1,0,0,110,0


# Scaling the inputs

In [4]:
from sklearn.preprocessing import MinMaxScaler
#scaling only selected columns to boost model accuracy
columns_to_scale = ['SST (°C)', 'pH Level', 'Species Observed', 'Latitude', 'Longitude', 'Year', 'Month']
scaler = MinMaxScaler()

In [5]:
df[columns_to_scale] = scaler.fit_transform(df[columns_to_scale])
#updating the dataset with the scaled data

In [17]:
df

Unnamed: 0,Year,Month,Tropical_group,Islands_group,Red_Sea,Latitude,Longitude,SST (°C),pH Level,None,Low_to_Medium_Bleaching,High,Species Observed,Marine Heatwave
0,0.0,0.0,0,0,1,0.997476,0.639809,0.609195,0.727554,1,0,0,0.444444,0
1,0.0,0.0,1,0,0,0.002187,0.999839,0.628004,0.408669,0,0,1,0.529915,0
2,0.0,0.0,1,0,0,0.866376,0.265840,0.545455,0.232198,0,0,1,0.307692,0
3,0.0,0.0,1,0,0,0.001761,0.999412,0.556949,0.380805,0,1,0,0.341880,0
4,0.0,0.0,0,1,0,0.454551,0.213282,0.518286,0.325077,1,0,0,0.478632,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,1.0,1.0,0,1,0,0.453585,0.213282,0.735632,0.207430,0,1,0,0.461538,1
496,1.0,1.0,1,0,0,0.559810,0.754499,0.820272,0.247678,0,1,0,0.230769,1
497,1.0,1.0,1,0,0,0.868095,0.265853,0.500522,0.560372,0,1,0,0.316239,0
498,1.0,1.0,0,0,1,0.996813,0.639718,0.430512,0.820433,0,1,0,0.461538,0


# Balancing the dataset

In [18]:
df['Marine Heatwave'].value_counts()

Unnamed: 0_level_0,count
Marine Heatwave,Unnamed: 1_level_1
0,427
1,73


In [19]:
#splitting the data into inputs and targets variables
scaled_inputs = df.drop(['Marine Heatwave'], axis=1)
target = df['Marine Heatwave']


In [20]:
from imblearn.over_sampling import SMOTE
from collections import Counter

# Initialize SMOTE
smote = SMOTE(random_state=42)

# Resample inputs and targets
X_resampled, y_resampled = smote.fit_resample(scaled_inputs, target)

In [24]:
inputs = X_resampled
targets = y_resampled

# Shuffling the dataset

In [25]:
shuffled_indices = np.arange(inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_inputs = inputs.iloc[shuffled_indices]
shuffled_targets = targets.iloc[shuffled_indices]

# Train, Test and Validation

In [26]:
# Get total number of samples after shuffling
Samples_count = shuffled_inputs.shape[0]

# Calculate how many samples will go into training set (80% of total), validation set (10% of total) and test set (10% of total)
train_samples_count = int(0.8 * Samples_count)
validation_samples_count = int(0.1 * Samples_count)
test_samples_count = Samples_count - train_samples_count - validation_samples_count

# Slice the shuffled data to get the training inputs and shuffled targets to get the training targets.
train_inputs = shuffled_inputs[:train_samples_count]
train_targets = shuffled_targets[:train_samples_count]

# Slice the next part of shuffled data for validation inputs and the corresponding targets for validation
validation_inputs = shuffled_inputs[train_samples_count:train_samples_count+validation_samples_count]
validation_targets = shuffled_targets[train_samples_count:train_samples_count+validation_samples_count]

# The remaining samples are used for test inputs and corresponding targets for the test inputs
test_inputs = shuffled_inputs[train_samples_count+validation_samples_count:]
test_targets = shuffled_targets[train_samples_count+validation_samples_count:]

# Print the number of positive samples (sum of targets), total samples, and the proportion for training set, validation set and test set
print(np.sum(train_targets), train_samples_count, np.sum(train_targets) / train_samples_count)
print(np.sum(validation_targets), validation_samples_count, np.sum(validation_targets) / validation_samples_count)
print(np.sum(test_targets), test_samples_count, np.sum(test_targets) / test_samples_count)

350 683 0.5124450951683748
43 85 0.5058823529411764
34 86 0.3953488372093023


In [27]:
 np.savez('Shifting_Seas_data_train', inputs=train_inputs, targets=train_targets)
 np.savez('Shifting_Seas_data_validation', inputs=validation_inputs, targets=validation_targets)
 np.savez('Shifting_Seas_data_test', inputs=test_inputs, targets=test_targets)

# Building the NN model

In [29]:
# Load train data
npz = np.load('Shifting_Seas_data_train.npz')
train_inputs = npz['inputs'].astype(float)       # OR use np.float64
train_targets = npz['targets'].astype(int)        # OR use np.int64

# Load validation data
npz = np.load('Shifting_Seas_data_validation.npz')
validation_inputs = npz['inputs'].astype(float)
validation_targets = npz['targets'].astype(int)

# Load test data
npz = np.load('Shifting_Seas_data_test.npz')
test_inputs = npz['inputs'].astype(float)
test_targets = npz['targets'].astype(int)

In [31]:
input_size = 13
output_size = 2
hidden_layer_size = 50
model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
batch_size = 100
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)
model.fit(train_inputs, train_targets, batch_size=batch_size, epochs=max_epochs,callbacks=[early_stopping], validation_data=(validation_inputs, validation_targets), verbose=2)


Epoch 1/100
7/7 - 2s - 217ms/step - accuracy: 0.5315 - loss: 0.6890 - val_accuracy: 0.6588 - val_loss: 0.6409
Epoch 2/100
7/7 - 0s - 20ms/step - accuracy: 0.6413 - loss: 0.6471 - val_accuracy: 0.7294 - val_loss: 0.6104
Epoch 3/100
7/7 - 0s - 20ms/step - accuracy: 0.7350 - loss: 0.6227 - val_accuracy: 0.7647 - val_loss: 0.5844
Epoch 4/100
7/7 - 0s - 14ms/step - accuracy: 0.7643 - loss: 0.5951 - val_accuracy: 0.8000 - val_loss: 0.5551
Epoch 5/100
7/7 - 0s - 20ms/step - accuracy: 0.7818 - loss: 0.5665 - val_accuracy: 0.7882 - val_loss: 0.5264
Epoch 6/100
7/7 - 0s - 15ms/step - accuracy: 0.7994 - loss: 0.5382 - val_accuracy: 0.8235 - val_loss: 0.4941
Epoch 7/100
7/7 - 0s - 13ms/step - accuracy: 0.8272 - loss: 0.5061 - val_accuracy: 0.8706 - val_loss: 0.4602
Epoch 8/100
7/7 - 0s - 20ms/step - accuracy: 0.8419 - loss: 0.4718 - val_accuracy: 0.8706 - val_loss: 0.4255
Epoch 9/100
7/7 - 0s - 20ms/step - accuracy: 0.8668 - loss: 0.4352 - val_accuracy: 0.8941 - val_loss: 0.3879
Epoch 10/100
7/7 -

<keras.src.callbacks.history.History at 0x7ab3001f4b10>

# Testing the Model

In [32]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9572 - loss: 0.1543


In [34]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.19. Test accuracy: 95.35%
