In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
def load_and_process_data(train_path, test_path):
    df_train = pd.read_excel(train_path)
    train_series = pd.Series(df_train["Curah_Hujan_Winsor"].values, index=pd.date_range(start='2020-01-01', periods=len(df_train), freq='D'))

    df_test = pd.read_excel(test_path)
    actual_jan_mar_2025 = pd.Series(df_test["Curah_Hujan_Winsor"].values, index=pd.date_range(start='2025-01-01', periods=len(df_test), freq='D'))

    return train_series, actual_jan_mar_2025

if __name__ == '__main__':
    train_data, actual_2025_data = load_and_process_data('data_train_winsor.xlsx', 'data_test_winsor.xlsx')
    print("Data loaded and processed successfully.")
    print(f"Train data shape: {train_data.shape}")
    print(f"Actual Jan-Mar 2025 data shape: {actual_2025_data.shape}")

Data loaded and processed successfully.
Train data shape: (1827,)
Actual Jan-Mar 2025 data shape: (90,)


In [3]:
def run_arima_sarima_models(train_data_path, test_data_path):
    df_train = pd.read_excel(train_data_path, index_col='Tanggal', parse_dates=True)
    df_test = pd.read_excel(test_data_path, index_col='Tanggal', parse_dates=True)

    train_series = df_train['Curah_Hujan_Winsor']
    test_series = df_test['Curah_Hujan_Winsor']

    p_values = [1, 2, 3, 5]
    d = 1
    q = 1

    arima_results = {}
    sarima_results = {}

    start_prediction_date = test_series.index[0]
    end_prediction_date = test_series.index[-1]

    # ARIMA
    for p in p_values:
        order = (p, d, q)
        print(f"\nFitting ARIMA{order} model...")
        try:
            arima_model = ARIMA(train_series, order=order)
            arima_model_fit = arima_model.fit()

            arima_predictions = arima_model_fit.predict(start=start_prediction_date, end=end_prediction_date)

            arima_rmse = np.sqrt(mean_squared_error(test_series, arima_predictions))
            arima_results[order] = {
                'rmse': arima_rmse,
                'predictions': arima_predictions
            }
            print(f"ARIMA{order} - RMSE: {arima_rmse:.3f}")

            plt.figure(figsize=(12, 6))
            plt.plot(test_series.index, test_series, label='Actual Rainfall')
            plt.plot(arima_predictions.index, arima_predictions, color='red', label=f'ARIMA{order} Predictions')
            plt.title(f'ARIMA{order} Predictions vs Actual Rainfall (Jan-Mar 2025)')
            plt.xlabel('Date')
            plt.ylabel('Rainfall')
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(f'arima_{p}_{d}_{q}_predictions.png')
            plt.close()

        except Exception as e:
            print(f"Error fitting ARIMA{order}: {e}")
            arima_results[order] = {'rmse': np.nan, 'predictions': None}

    # SARIMA
    seasonal_order = (1, 1, 1, 12)

    for p in p_values:
        order = (p, d, q)
        print(f"\nFitting SARIMA{order}{seasonal_order} model...")
        try:
            sarima_model = ARIMA(train_series, order=order, seasonal_order=seasonal_order)
            sarima_model_fit = sarima_model.fit()

            sarima_predictions = sarima_model_fit.predict(start=start_prediction_date, end=end_prediction_date)

            sarima_rmse = np.sqrt(mean_squared_error(test_series, sarima_predictions))
            sarima_results[order] = {
                'rmse': sarima_rmse,
                'predictions': sarima_predictions
            }
            print(f"SARIMA{order}{seasonal_order} - RMSE: {sarima_rmse:.3f}")

            plt.figure(figsize=(12, 6))
            plt.plot(test_series.index, test_series, label='Actual Rainfall')
            plt.plot(sarima_predictions.index, sarima_predictions, color='green', label=f'SARIMA{order}{seasonal_order} Predictions')
            plt.title(f'SARIMA{order}{seasonal_order} Predictions vs Actual Rainfall (Jan-Mar 2025)')
            plt.xlabel('Date')
            plt.ylabel('Rainfall')
            plt.legend()
            plt.grid(True)
            plt.tight_layout()
            plt.savefig(f'sarima_{p}_{d}_{q}_{seasonal_order[0]}_{seasonal_order[1]}_{seasonal_order[2]}_{seasonal_order[3]}_predictions.png')
            plt.close()

        except Exception as e:
            print(f"Error fitting SARIMA{order}{seasonal_order}: {e}")
            sarima_results[order] = {'rmse': np.nan, 'predictions': None}

    print("\n--- ARIMA Results ---")
    for order, metrics in arima_results.items():
        print(f"ARIMA{order}: RMSE={metrics['rmse']:.3f}")

    print("\n--- SARIMA Results ---")
    for order, metrics in sarima_results.items():
        print(f"SARIMA{order}: RMSE={metrics['rmse']:.3f}")

if __name__ == '__main__':
    run_arima_sarima_models('data_train_winsor.xlsx', 'data_test_winsor.xlsx')


Fitting ARIMA(1, 1, 1) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ARIMA(1, 1, 1) - RMSE: 17.294

Fitting ARIMA(2, 1, 1) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ARIMA(2, 1, 1) - RMSE: 17.262

Fitting ARIMA(3, 1, 1) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ARIMA(3, 1, 1) - RMSE: 17.261

Fitting ARIMA(5, 1, 1) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


ARIMA(5, 1, 1) - RMSE: 17.266

Fitting SARIMA(1, 1, 1)(1, 1, 1, 12) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


SARIMA(1, 1, 1)(1, 1, 1, 12) - RMSE: 17.213

Fitting SARIMA(2, 1, 1)(1, 1, 1, 12) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


SARIMA(2, 1, 1)(1, 1, 1, 12) - RMSE: 17.154

Fitting SARIMA(3, 1, 1)(1, 1, 1, 12) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


SARIMA(3, 1, 1)(1, 1, 1, 12) - RMSE: 17.147

Fitting SARIMA(5, 1, 1)(1, 1, 1, 12) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


SARIMA(5, 1, 1)(1, 1, 1, 12) - RMSE: 17.157

--- ARIMA Results ---
ARIMA(1, 1, 1): RMSE=17.294
ARIMA(2, 1, 1): RMSE=17.262
ARIMA(3, 1, 1): RMSE=17.261
ARIMA(5, 1, 1): RMSE=17.266

--- SARIMA Results ---
SARIMA(1, 1, 1): RMSE=17.213
SARIMA(2, 1, 1): RMSE=17.154
SARIMA(3, 1, 1): RMSE=17.147
SARIMA(5, 1, 1): RMSE=17.157


In [4]:
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA

def create_combined_plot(train_data_path, test_data_path):
    df_train = pd.read_excel(train_data_path, index_col='Tanggal', parse_dates=True)
    df_test = pd.read_excel(test_data_path, index_col='Tanggal', parse_dates=True)

    train_series = df_train['Curah_Hujan_Winsor']
    test_series = df_test['Curah_Hujan_Winsor']

    plot_start_date = pd.to_datetime('2025-01-01')
    plot_end_date = pd.to_datetime('2025-03-31')

    actual_data_plot = test_series[plot_start_date:plot_end_date]

    # ARIMA
    arima_order = (5, 1, 1)
    print(f"\nFitting ARIMA{arima_order} model...")
    try:
        arima_model = ARIMA(train_series, order=arima_order)
        arima_model_fit = arima_model.fit()
        arima_predictions = arima_model_fit.predict(start=plot_start_date, end=plot_end_date)
    except Exception as e:
        print(f"Error fitting ARIMA{arima_order}: {e}")
        arima_predictions = pd.Series(index=actual_data_plot.index)

    # SARIMA
    sarima_order = (5, 1, 1)
    seasonal_order = (1, 1, 1, 12)
    print(f"\nFitting SARIMA{sarima_order}{seasonal_order} model...")
    try:
        sarima_model = ARIMA(train_series, order=sarima_order, seasonal_order=seasonal_order)
        sarima_model_fit = sarima_model.fit()
        sarima_predictions = sarima_model_fit.predict(start=plot_start_date, end=plot_end_date)
    except Exception as e:
        print(f"Error fitting SARIMA{sarima_order}{seasonal_order}: {e}")
        sarima_predictions = pd.Series(index=actual_data_plot.index)

    # Combined
    plt.figure(figsize=(15, 7))
    plt.plot(actual_data_plot.index, actual_data_plot, label='Data Aktual (Jan-Mar 2025)', color='blue')
    plt.plot(arima_predictions.index, arima_predictions, label='Prediksi ARIMA (Jan-Mar 2025)', color='red', linestyle='--')
    plt.plot(sarima_predictions.index, sarima_predictions, label='Prediksi SARIMA (Jan-Mar 2025)', color='orange', linestyle=':')

    plt.title('Perbandingan Prediksi ARIMA dan SARIMA vs Data Aktual (Jan-Mar 2025)')
    plt.xlabel('Tanggal')
    plt.ylabel('Curah Hujan (mm)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig('combined_predictions_jan_mar_2025.png')
    plt.close()
    print('Grafik gabungan prediksi disimpan sebagai combined_predictions_jan_mar_2025.png')

if __name__ == '__main__':
    create_combined_plot('/content/data_train_winsor.xlsx', '/content/data_test_winsor.xlsx')


Fitting ARIMA(5, 1, 1) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)



Fitting SARIMA(5, 1, 1)(1, 1, 1, 12) model...


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Grafik gabungan prediksi disimpan sebagai combined_predictions_jan_mar_2025.png
