In [None]:
# Import required libraries
import pandas as pd
import numpy as np
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from datetime import datetime
import matplotlib.pyplot as plt
import warnings
from sklearn.metrics import mean_absolute_error
# double and triple exponential smoothing
from statsmodels.tsa.holtwinters import ExponentialSmoothing
# Seasonality decomposition
from statsmodels.tsa.seasonal import seasonal_decompose

# Suppress warnings for cleaner outputs
warnings.filterwarnings("ignore")

# Ensure plots are displayed inline
%matplotlib inline

from itertools import product

### Prepare the Dataset

In [None]:
# Load the data
df = pd.read_excel('Dataset_palm_oil_forecasting.xlsx')
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

# Ensure the DataFrame is sorted by its index (date in this case)
df = df.sort_index()

# Split the dataset into training and testing sets
train_end_date = '2018-12-01'
testing_start_date = (pd.to_datetime(train_end_date) + pd.DateOffset(months=1)).strftime('%Y-%m-%d')  # Start testing 1 month after training ends
testing_end_date = (pd.to_datetime(testing_start_date) + pd.DateOffset(months=11)).strftime('%Y-%m-%d')  # Testing ends 12 months later

train_data = df[:train_end_date]  # Training data
test_data = df[testing_start_date:testing_end_date]  # Testing data

# Load additional areas data for later calculations
area = pd.read_excel('Oil_Production_Raw_Data.xlsx', sheet_name='Area')
area.set_index('Date', inplace=True)

In [109]:
# Decomposing the time series (works if the index is a datetime object)
decomposition = seasonal_decompose(df['Palm_Oil'], model='additive', period=12)  # Specify the period (e.g., 12 for monthly data)

# Plot the decomposition
decomposition.plot()
plt.show()

In [None]:
# Define parameter grid
trend_options = ['additive', 'multiplicative', None]
seasonal_options = ['additive', 'multiplicative', None]
best_mape = float('inf')
best_params = None

for trend, seasonal in product(trend_options, seasonal_options):
    try:
        model = ExponentialSmoothing(
            train_data['Palm_Oil'],
            trend=trend,
            seasonal=seasonal,
            seasonal_periods=12
        )
        hw_model = model.fit()
        forecast = hw_model.forecast(steps=12)
        # Calculate MAPE
        mape = np.mean(np.abs((test_data['Palm_Oil'] - forecast) / test_data['Palm_Oil'])) * 100
        if mape < best_mape:
            best_mape = mape
            best_params = (trend, seasonal)
    except:
        pass

print(f"Best Parameters: {best_params}, Best MAPE: {best_mape:.2f}%")

In [111]:
# Fit the Holt-Winters model on the training dataset
model = ExponentialSmoothing(
    train_data['Palm_Oil'],  # Training data
    trend='additive',  # Adjust to 'multiplicative' if you observe multiplicative trends
    seasonal='additive',  # Adjust to 'multiplicative' if seasonal patterns are multiplicative
    seasonal_periods=12  # Monthly data (12 periods in a year)
)
hw_model = model.fit()

# Forecast 12 months ahead
forecast = hw_model.forecast(steps=12)

# Plot the forecasted year and actual test data
plt.figure(figsize=(12, 6))

# Plot testing data (actual values)
plt.plot(test_data.index, test_data['Palm_Oil'], label='Actual Testing Data', color='green')

# Plot forecasted data
plt.plot(forecast.index, forecast, label='Forecasted Data', color='black')

# Add labels and legend
plt.title("Palm Oil Production: Forecast vs Actual for Testing Year")
plt.xlabel("Date")
plt.ylabel("Palm Oil Production")
plt.legend(loc='best')
plt.grid()
plt.show()

In [112]:
# Ensure the forecast and test data align properly
actuals = test_data['Palm_Oil']
forecasts = forecast

# Calculate MAPE
mape = np.mean(np.abs((actuals - forecasts) / actuals)) * 100

# Display the MAPE value
print(f"Mean Absolute Percentage Error (MAPE): {mape:.2f}%")

In [113]:
forecast

In [114]:
actual_values = test_data['Palm_Oil']

comparison_df = pd.DataFrame({
    'Date': forecast.index,
    'ForecastValue': forecast.values,
    'ActualValue': actual_values.values,
})

comparison_df['AbsoluteError'] = np.abs(comparison_df['ForecastValue'] - comparison_df['ActualValue'])
comparison_df['APE'] = (comparison_df['AbsoluteError'] / comparison_df['ActualValue']) * 100

comparison_df['CPO (T/ha) Forecast'] = comparison_df['ForecastValue'] * area['OER CPO'].iloc[0]  # Example calculation using area dataframe
comparison_df['CPO (T/ha) Actuals'] = comparison_df['ActualValue'] * area['OER CPO'].iloc[0]
comparison_df['TotalProductionForecast'] = comparison_df['CPO (T/ha) Forecast'] * (area['Mature area'].iloc[0] + 0.75 * area['Immature Area'].iloc[0])
comparison_df['TotalProductionActuals'] = comparison_df['CPO (T/ha) Actuals'] * (area['Mature area'].iloc[0] + 0.75 * area['Immature Area'].iloc[0])

In [115]:
# RMSE, MSE, and MAPE calculations
mse = np.mean((comparison_df['ForecastValue'] - comparison_df['ActualValue'])**2)
rmse = np.sqrt(mse)
mape = np.mean(comparison_df['APE'])

print(f"MSE: {mse:.2f}, RMSE: {rmse:.2f}, MAPE: {mape:.2f}%")

In [116]:
# Adding Metadata and Metric Results
training_start_date = train_data.index[0].strftime('%Y-%m-%d')
training_end_date = train_end_date
testing_start_date = testing_start_date
testing_end_date = testing_end_date
creation_date = pd.Timestamp('today').strftime('%Y-%m-%d')

results = []  # Initialize results as a list of dictionaries

for date, forecast_value, actual_value, absolute_error, ape, total_forecast, total_actuals in zip(
    comparison_df['Date'],
    comparison_df['ForecastValue'],
    comparison_df['ActualValue'],
    comparison_df['AbsoluteError'],
    comparison_df['APE'],
    comparison_df['TotalProductionForecast'],
    comparison_df['TotalProductionActuals']
):
    results.append({
        'Date': date.strftime('%Y-%m-%d'),
        'ForecastValue': forecast_value,
        'ActualValue': actual_value,
        'ModelName': 'Holt-Winters',
        'Variables': 'Palm_Oil',
        'ModelParameters': 'trend=additive, seasonal=additive, seasonal_periods=12',
        'TrainingStartDate': training_start_date,
        'TrainingEndDate': training_end_date,
        'TestingStartDate': testing_start_date,
        'TestingEndDate': testing_end_date,
        'RMSE': rmse,
        'MAPE': ape / 100,
        'MSE': mse,
        'AbsoluteError': absolute_error,
        'TotalProductionForecast': total_forecast,
        'TotalProductionActuals': total_actuals,
    })

results_df = pd.DataFrame(results)

In [117]:
results_df

In [118]:
#all_results_df = pd.DataFrame(columns=results_df.columns)
all_results_df = pd.concat([results_df, all_results_df], ignore_index=True) 
all_results_df

In [121]:
# Format date columns for display

all_results_df['Date'] = pd.to_datetime(all_results_df['Date']).dt.strftime('%d.%m.%Y')
all_results_df['Creation_Date'] = datetime.now().strftime('%Y-%m-%d')
all_results_df['Creation_Date'] = pd.to_datetime(all_results_df['Creation_Date']).dt.strftime('%d.%m.%Y')
all_results_df['TrainingStartDate'] = pd.to_datetime(all_results_df['TrainingStartDate']).dt.strftime('%d.%m.%Y')
all_results_df['TrainingEndDate'] = pd.to_datetime(all_results_df['TrainingEndDate']).dt.strftime('%d.%m.%Y')
all_results_df['TestingStartDate'] = pd.to_datetime(all_results_df['TestingStartDate']).dt.strftime('%d.%m.%Y')
all_results_df['TestingEndDate'] = pd.to_datetime(all_results_df['TestingEndDate']).dt.strftime('%d.%m.%Y')

In [None]:
all_results

In [None]:
all_results_df.to_excel('Holt_Winters_results.xlsx')