In [8]:
from pathlib import Path
import matplotlib.pyplot as plt
import xarray as xr
import numpy as np
import pandas as pd

dir0 = Path('el_nino/')
file_sst = 'sst.mnmean.nc'
file_2 = 'mslp_coarse.nc'

# load the data set with xarray
ds_nino = xr.open_dataset(Path(dir0, file_sst))

# define 3.4 region
lat_min, lat_max = -5.5, 5.5
lon_min, lon_max = 190, 240

# Select the region
ds_region = ds_nino.where((ds_nino.lat >= lat_min) & (ds_nino.lat <= lat_max) & (ds_nino.lon >= lon_min) & (ds_nino.lon <= lon_max), drop=True)

# List of years when El Niño occurred
elnino_years = [1982, 1986, 1987, 1991, 1994, 1997, 2002, 2004, 2006, 2009, 2014, 2015, 2018]
lanina_years = [1983, 1984, 1988, 1995, 1998, 1999, 2000, 2005, 2007, 2008, 2010, 2016, 2017, 2020, 2021]
years2 = range(1983, 2021)

# Prepare lists to store data
years_col = []
el_nino_col = []
sst_data = {}

# Loop through each year
for year in years2:
    years_col.append(year)
    el_nino_col.append(1 if year in elnino_years else -1 if year in lanina_years else 0)
    
    # flatten the sst from ds_region
    filtered_data = ds_region.where(ds_region['time'].dt.year == (year - 1), drop=True)
    numpy_array = filtered_data['sst'].to_numpy()
    flattened_data = numpy_array.flatten()
    sst_data[str(year)] = flattened_data

# Convert the dictionary to a 2D NumPy array
sst_array = np.array(list(sst_data.values()))

# Convert el_nino_col to a NumPy array
labels = np.array(el_nino_col)

print(sst_array.shape)
print(labels.shape)



(38, 7200)
(38,)


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import Lasso
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


X_train, X_test, y_train, y_test = train_test_split(sst_array, labels, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply PCA
pca = PCA(n_components=0.95)  # Keep 95% of variance
X_train = pca.fit_transform(X_train_scaled)
X_test = pca.transform(X_test_scaled)

# Apply LDA
lda = LinearDiscriminantAnalysis()
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

# Create the SVM model with a kernel
svm_model = SVC(kernel='rbf')

# Train the model
svm_model.fit(X_train, y_train)

# Make predictions
y_pred = svm_model.predict(X_test)

# Create a DataFrame for comparison
comparison_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

# Display the DataFrame
print(comparison_df)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

   Actual  Predicted
0      -1         -1
1       0          1
2       1          1
3       0          1
4       0          1
5       1          0
6       0         -1
7      -1         -1
Model Accuracy: 0.375
