In [14]:
from pathlib import Path
import xarray as xr
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

dir0 = Path('el_nino/')
file_sst = 'sst.mnmean.nc'
file_2 = 'mslp_coarse.nc'

# load the data set with xarray
ds_nino = xr.open_dataset(Path(dir0, file_sst))
ds_mslp = xr.open_dataset(Path(dir0, file_2))

# define 3.4 region
lat_min, lat_max = -5.5, 5.5
lon_min, lon_max = 190, 240

# Interpolating to get rid of the nan-values
ds_nino = ds_nino.interpolate_na(dim='lon')
ds_mslp = ds_mslp.interpolate_na(dim='lon')

# Select the region
ds_region_nino = ds_nino.where((ds_nino.lat >= lat_min) & (ds_nino.lat <= lat_max) & 
                               (ds_nino.lon >= lon_min) & (ds_nino.lon <= lon_max), drop=True)
ds_region_mslp = ds_mslp.where((ds_mslp.latitude >= lat_min) & (ds_mslp.latitude <= lat_max) & 
                               (ds_mslp.longitude >= lon_min) & (ds_mslp.longitude <= lon_max), drop=True)

In [15]:
# Extracting the labels from 01/1982 to 05/2021
# -2 = Strong La Nina
# -1 = La Nina
# 0 = Nothing
# 1 = El Nino
# 2 = Strong El Nino

pred_2023 = {}

# Initialisation
start_date_y = pd.Timestamp(year = 1982, month = 1, day = 1)
end_date_y = pd.Timestamp(year = 2021, month = 5, day = 1)
current_date = start_date_y

# Mean temperature in the region over all the years
big_mean = float(ds_region_nino.mean()['sst'])

ys = []

while current_date <= end_date_y:
    # print(current_date)

    # Create Timestamps for previous, current, and next months
    current_month = current_date
    prev_month = current_month - pd.DateOffset(months = 1)
    next_month = current_month + pd.DateOffset(months = 1)

    # Get data for each month
    ds_prev_month = ds_region_nino.sel(time = slice(prev_month, prev_month))
    ds_curr_month = ds_region_nino.sel(time = slice(current_month, current_month))
    ds_next_month = ds_region_nino.sel(time = slice(next_month, next_month))

    # Merge the three datasets
    merged_dataset = xr.concat([ds_prev_month, ds_curr_month, ds_next_month], dim='time')

    # Calculate the average sea surface temperature along the time dimension
    sst_anom = float(merged_dataset['sst'].mean()) - big_mean
    # print(current_date, ': ', sst_anom)
    cases = [
        (sst_anom >= 0.5),
        (sst_anom < 0.5) & (sst_anom > -0.5),
        (sst_anom <= -0.5),
    ]
    conditions = [1, 0, -1]
    res = np.select(cases, conditions, 0)

    ys.append(res)

    # Increment to the first day of the next month
    current_date += pd.DateOffset(months = 1)

# Convert the list to a numpy array
ys_np = np.array(ys)

In [16]:
n_month = 1

# Dataset to predict n_month in advance using 1 year of data
start_date_X = start_date_y - pd.DateOffset(years = 1) - pd.DateOffset(months = n_month - 1)
end_date_X = end_date_y - pd.DateOffset(years = 1) - pd.DateOffset(months = n_month - 1)
current_date = start_date_X

xs_np = {}
xs_np_pred ={}

while current_date <= end_date_X:
    
    start_variable = current_date
    end_variable = current_date + pd.DateOffset(years = 1) - pd.DateOffset(months = 1)
    # print(start_variable, ' => ', end_variable)

    # Selecting the data for the one-year interval
    interval_data = ds_mslp.sel(time=slice(start_variable, end_variable))

    # Formatting the interval data
    numpy_array = interval_data['msl'].to_numpy()
    flattened_data = numpy_array.flatten()
    xs_np[str(end_variable.year) + "/" + str(end_variable.month + n_month)] = flattened_data
    # print(str(end_variable.year) + "/" + str(end_variable.month + n_month))

    # Increment to the first day of the next month
    current_date += pd.DateOffset(months = 1)

xs_np = np.array(list(xs_np.values()))

In [5]:
print(xs_np.shape)

(473, 781920)


In [19]:
# Neural Network

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import regularizers
from tensorflow.keras.utils import to_categorical

# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(xs_np)

# Apply PCA
pca = PCA(n_components =  0.95)  # Keep % of variance
X_pca = pca.fit_transform(X_scaled)

# Splitting the code in Test and Training set
X_train, X_test, y_train, y_test = train_test_split(X_pca, ys_np, test_size = 0.2, random_state = 42)

# One-hot encode the labels
y_train = to_categorical(y_train, num_classes = 3)
y_test = to_categorical(y_test, num_classes = 3)

# To be used in the neural network
rows, cols = X_train.shape
print(cols)

# L1 Regularization factor
l1_lambda = 0.05

# Create a Sequential model for regression
model = Sequential()

# Shape of the neural network
model.add(Dense(24, activation='relu', input_shape=(cols,), kernel_regularizer = regularizers.l1(l1_lambda)))
model.add(Dense(12, activation='relu', kernel_regularizer = regularizers.l1(l1_lambda)))
model.add(Dense(3, activation='linear', kernel_regularizer = regularizers.l1(l1_lambda)))

model.compile(optimizer = 'adam', 
              loss = 'categorical_crossentropy',
              metrics = ['accuracy'])

history = model.fit(X_train, y_train, epochs = 15, validation_data=(X_test, y_test))

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
train_loss, train_accuracy = model.evaluate(X_train, y_train)

print(f"Train loss: {train_loss}")
print(f"Test loss: {test_loss}")
print(f"Train Accuracy: {train_accuracy}")
print(f"Test Accuracy: {test_accuracy}")

254
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "C:\Users\adete\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3550, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\adete\AppData\Local\Temp\ipykernel_22940\557627029.py", line 39, in <module>
    model.compile(LinearDiscriminantAnalysis(),
  File "c:\Users\adete\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "c:\Users\adete\AppData\Local\Programs\Python\Python311\Lib\site-packages\keras\src\utils\traceback_utils.py", line 67, in error_handler
    filtered_tb = _process_traceback_frames(e.__traceback__)
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: Model.compile() got multiple values for argument 'optimizer'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  Fi