In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Function to calculate Mean Bias Error (MBE)
def mean_bias_error(y_true, y_pred):
    return np.mean(y_true - y_pred)

# Function to calculate Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


def rmse()

# Read the CSV file
file_name = 'SeaData\Cold_Tongue_Equatorial Current_s_dbias.csv'
df = pd.read_csv(file_name, parse_dates=['date'], index_col='date')

# Fill in missing data using forward filling
df.fillna(method='ffill', inplace=True)

# Split the dataset into training and testing datasets
train_data = df.iloc[:-10]
test_data = df.iloc[-10:]
# Inspect the data
print(df.head())

# Plot the time series data
plt.figure(figsize=(10, 5))
plt.plot(df)
plt.xlabel('Date')
plt.ylabel('dbiastg')
plt.title('Time Series Data')
plt.show()

# Determine the ARIMA order (p, d, q) - You may need to manually adjust these values
# Define the parameter grid for the grid search
p_values = range(0, 3)
d_values = range(0, 3)
q_values = range(0, 3)

# Perform grid search
best_aic = np.inf
best_params = None

for p in p_values:
    for d in d_values:
        for q in q_values:
            try:
                model = ARIMA(df, order=(p, d, q))
                results = model.fit()
                if results.aic < best_aic:
                    best_aic = results.aic
                    best_params = (p, d, q)
            except:
                continue

print(f"Best ARIMA parameters: {best_params}")

# Fit the ARIMA model
model = ARIMA(df, order=best_params)
results = model.fit()

# Summarize the model
print(results.summary())

# Forecast the next 10 days
forecast_steps = 10
forecast = results.forecast(steps=forecast_steps)
print(forecast)

# Plot the original data and the forecast
plt.figure(figsize=(10, 5))
plt.plot(df.index[-100:], df['dbiastg'].iloc[-100:], label='Original Data')
plt.plot(pd.date_range(df.index[-1], periods=forecast_steps+1, closed='right'), forecast, label='Forecast', linestyle='--', color='red')
plt.xlabel('Date')
plt.ylabel('dbiastg')
plt.title('Time Series Data with ARIMA Forecast')
plt.legend()
plt.show()

# Extract 'dbiastg' values from the test_data DataFrame
test_data_values = test_data['dbiastg'].values

# Ensure the lengths of the arrays are equal
assert len(test_data_values) == len(forecast), "The lengths of test_data_values and forecast arrays do not match."

# Calculate performance metrics
rmse = np.sqrt(mean_squared_error(test_data_values, forecast))
mae = mean_absolute_error(test_data_values, forecast)
mbe = mean_bias_error(test_data_values, forecast)
mape = mean_absolute_percentage_error(test_data_values, forecast)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"MBE: {mbe}")
print(f"MAPE: {mape}")

In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pmdarima.arima import auto_arima

# Function to calculate Mean Bias Error (MBE)
def mean_bias_error(y_true, y_pred):
    return np.mean(y_true - y_pred)

# Function to calculate Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Read the CSV file
# file_name = 'SeaData\Cold_Tongue_Equatorial Current_s_dbias.csv'
# file_name = 'SeaData\El Nino Indicator Area_s dbias.csv'
# file_name = 'SeaData\Kuroshio Current_s dbias.csv'
# file_name = 'SeaData\\North Atlantic Current_s dbias.csv'
# file_name = 'SeaData\Pacific Ocean Warm Pool_s dbias.csv'
# file_name = 'SeaData\\bias\冷舌區域赤道冷流(110W,1S).csv'
# file_name = 'SeaData\\bias\聖嬰指標區域(150W,0N).csv'
# file_name = 'SeaData\\bias\黑潮洋流(135E,21.8N).csv'
# file_name = 'SeaData\\bias\太平洋暖池區域(145E,6.6N).csv'
file_name = 'SeaData\\bias\大西洋暖流(50W,31.3N).csv'

print(file_name)
df = pd.read_csv(file_name, parse_dates=['date'], index_col='date')

# Fill in missing data using forward filling
df.fillna(method='ffill', inplace=True)

# Split the dataset into training and testing datasets
train_data = df.iloc[:-100]
test_data = df.iloc[-100:]

# Plot the time series data
plt.figure(figsize=(10, 5))
plt.plot(df)
plt.xlabel('Date')
plt.ylabel('biastg')
plt.title('Time Series Data')
plt.show()

# Perform auto_arima search
model = auto_arima(train_data, seasonal=False, stepwise=True, suppress_warnings=True, trace=True)
best_params = model.order

print(f"Best ARIMA parameters: {best_params}")

# Fit the ARIMA model
model = ARIMA(train_data, order=best_params)
results = model.fit()

# Summarize the model
print(results.summary())

# Forecast the next 100 days
forecast_steps = 100
forecast = results.forecast(steps=forecast_steps)
print(forecast)

# Plot the original data and the forecast
# plt.figure(figsize=(10, 5))
# plt.plot(df.index[-200:], df['biastg'].iloc[-200:], label='Original Data')
# plt.plot(pd.date_range(df.index[-1], periods=forecast_steps+1, closed='right'), forecast, label='Forecast', linestyle='--', color='red')
# plt.xlabel('Date')
# plt.ylabel('biastg')
# plt.title('Time Series Data with ARIMA Forecast')
# plt.legend()
# plt.show()


# Extract 'biastg' values from the test_data DataFrame
test_data_values = test_data['biastg'].values

# Ensure the lengths of the arrays are equal
assert len(test_data_values) == len(forecast), "The lengths of test_data_values and forecast arrays do not match."

# Calculate performance metrics
# rmse = np.sqrt(mean_squared_error(test_data, forecast))
# mae = mean_absolute_error(test_data, forecast)
# mbe = mean_bias_error(test_data, forecast)
# mape = mean_absolute_percentage_error(test_data, forecast)

# Calculate performance metrics
rmse = np.sqrt(mean_squared_error(test_data_values, forecast))
mae = mean_absolute_error(test_data_values, forecast)
mbe = mean_bias_error(test_data_values, forecast)
mape = mean_absolute_percentage_error(test_data_values, forecast)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"MBE: {mbe}")
print(f"MAPE: {mape}")

# Plot the predicted vs actual values
# plt.figure(figsize=(10, 5))
# plt.plot(test_data.index, test_data['biastg'], label='Actual', marker='o')
# plt.plot(test_data.index, forecast, label='Predicted', marker='x', color='red')
# plt.xlabel('Date')
# plt.ylabel('biastg')
# plt.title('Predicted vs Actual Values')
# plt.legend()
# plt.show()


In [None]:
# Cross-validation loop
for train_index, test_index in tscv.split(df):
    train_data, test_data = df.iloc[train_index], df.iloc[test_index]
    
    # Print train_index and test_index
    print(train_index) 
    print(test_index) 
    
    # Fit the ARIMA model
    model = ARIMA(train_data, order=best_params)
    results = model.fit()
    
    # Forecast
    forecast_steps = len(test_data)
    forecast = results.forecast(steps=forecast_steps)
    
    # Calculate performance metrics
    rmse = np.sqrt(mean_squared_error(test_data, forecast))
    mae = mean_absolute_error(test_data, forecast)
    mbe = mean_bias_error(test_data, forecast)
    mape = mean_absolute_percentage_error(test_data, forecast)
    
    # Append performance metrics
    rmse_scores.append(rmse)
    mae_scores.append(mae)
    mbe_scores.append(mbe)
    mape_scores.append(mape)

# Print average performance metrics
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Average MAE: {np.mean(mae_scores)}")
print(f"Average MBE: {np.mean(mbe_scores)}")
print(f"Average MAPE: {np.mean(mape_scores)}")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.seasonal import STL
from sklearn.metrics import mean_squared_error, mean_absolute_error
from pmdarima import auto_arima

# Function to calculate Mean Bias Error (MBE)
def mean_bias_error(y_true, y_pred):
    return np.mean(y_true - y_pred)

# Function to calculate Mean Absolute Percentage Error (MAPE)
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

# Read the CSV file
file_name = 'SeaData\Cold_Tongue_Equatorial Current_s_dbias.csv'
df = pd.read_csv(file_name, parse_dates=['date'], index_col='date')

# Fill in missing data using forward filling
df.fillna(method='ffill', inplace=True)

# Apply STL decomposition to remove seasonality and trends
stl = STL(df['dbiastg'], seasonal=7)
res = stl.fit()
df['residual'] = res.resid

# Split the dataset into training and validation datasets
train_data = df.iloc[:-10]
valid_data = df.iloc[-10:]

# Fit the ARIMA model using auto_arima
model = auto_arima(train_data['residual'], suppress_warnings=True, seasonal=False)
print(f"Best ARIMA parameters: {model.order}")

# Fit the ARIMA model with the best parameters
results = model.fit(train_data['residual'])

# Summarize the model
print(results.summary())

# Forecast the next 10 days
forecast_steps = 10
forecast = results.predict(n_periods=forecast_steps)

# Add the seasonal and trend components back to the forecast
forecast += res.seasonal[-10:] + res.trend[-10:]

# Plot the original data and the forecast
plt.figure(figsize=(10, 5))
plt.plot(df.index[-100:], df['dbiastg'].iloc[-100:], label='Original Data')
plt.plot(valid_data.index, forecast, label='Forecast', linestyle='--', color='red')
plt.xlabel('Date')
plt.ylabel('dbiastg')
plt.title('Time Series Data with ARIMA Forecast')
plt.legend()
plt.show()

# Calculate performance metrics
rmse = np.sqrt(mean_squared_error(valid_data['dbiastg'], forecast))
mae = mean_absolute_error(valid_data['dbiastg'], forecast)
mbe = mean_bias_error(valid_data['dbiastg'], forecast)
mape = mean_absolute_percentage_error(valid_data['dbiastg'], forecast)

print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"MBE: {mbe}")
print(f"MAPE: {mape}")
