In [1]:
# Import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import seaborn as sns
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_squared_error

In [2]:
# Load data
df = pd.read_excel('../data.xlsx', sheet_name='Rekapan')

In [None]:
# Muncul data awal
df.head()

In [None]:
# Muncul data akhir
df.tail()

In [None]:
# Statistik deskriptif dasar
df.describe()

# Cek nilai yang hilang
df.isnull().sum()

# Visualisasi distribusi data
df.hist(bins=50, figsize=(20, 15))
plt.show()

In [None]:
# Filter kolom terkait
data_filtered = df[['Tanggal', 'SO', 'TERKIRIM', 'Harga Komoditas Bijih Besi', 'Indeks Produksi Dalam Negeri', 'Data Inflasi', 'Kurs']]

# Ganti nama kolom
data_filtered.columns = ['Tanggal', 'SO', 'Terkirim', 'Harga Komoditas', 'Indeks Produksi', 'Data Inflasi', 'Kurs']

# Konversi tipe data
data_filtered['Tanggal'] = pd.to_datetime(data_filtered['Tanggal'])

# Ganti tipe data 'Indeks Produksi'
data_filtered['Indeks Produksi'] = pd.to_numeric(data_filtered['Indeks Produksi'], errors='coerce')

# Buat kolom 'Year' dan 'Month'
# data_filtered['Year'] = data_filtered['Tanggal'].dt.year
# data_filtered['Month'] = data_filtered['Tanggal'].dt.month

data_filtered['bulan_tahun'] = data_filtered['Tanggal'].dt.to_period('M')

In [None]:
# Buat kolom 'Year' berdasarkan kolom 'Tanggal'
data_filtered['Year'] = data_filtered['Tanggal'].dt.year

# Rekapan Tahunan
yearly_summary = data_filtered.groupby('Year').agg({
    'SO': 'sum',
    # Kolom-kolom lain bisa ditambahkan sesuai kebutuhan
    # 'Terkirim': 'sum',
    # 'Harga Komoditas': 'mean',
    # 'Indeks Produksi': 'mean',
    # 'Data Inflasi': 'mean',
    # 'Kurs': 'mean'
}).reset_index()

# Tampilkan Rekapan Tahunan
print(yearly_summary)

In [None]:
# Set alpha value for SES (Smoothing Factor)
alpha = 0.1

# Initialize columns for SES forecast and error metrics
yearly_summary['SES Forecast'] = np.nan
yearly_summary['Error'] = np.nan
yearly_summary['MAD'] = np.nan
yearly_summary['MSE'] = np.nan
yearly_summary['MAPE'] = np.nan

# Set initial forecast to the first SO value
yearly_summary.loc[0, 'SES Forecast'] = yearly_summary.loc[0, 'SO']

# Apply SES formula and calculate error metrics for each subsequent row
for i in range(1, len(yearly_summary)):
    # SES Forecast
    yearly_summary.loc[i, 'SES Forecast'] = (alpha * yearly_summary.loc[i-1, 'SO']) + ((1 - alpha) * yearly_summary.loc[i-1, 'SES Forecast'])
    
    # Error
    yearly_summary.loc[i, 'Error'] = yearly_summary.loc[i, 'SO'] - yearly_summary.loc[i, 'SES Forecast']
    
    # MAD (Mean Absolute Deviation)
    yearly_summary.loc[i, 'MAD'] = abs(yearly_summary.loc[i, 'Error'])
    
    # MSE (Mean Squared Error)
    yearly_summary.loc[i, 'MSE'] = yearly_summary.loc[i, 'Error'] ** 2
    
    # MAPE (Mean Absolute Percentage Error)
    if yearly_summary.loc[i, 'SO'] != 0:
        yearly_summary.loc[i, 'MAPE'] = (abs(yearly_summary.loc[i, 'Error']) / yearly_summary.loc[i, 'SO']) * 100
    else:
        yearly_summary.loc[i, 'MAPE'] = np.nan

# Tampilkan hasil peramalan dan error metrics
print(yearly_summary)

In [None]:
# Plot hasil peramalan versus data aktual
plt.figure(figsize=(10, 6))
plt.plot(yearly_summary['Year'], yearly_summary['SO'], label='Actual SO', marker='o')
plt.plot(yearly_summary['Year'], yearly_summary['SES Forecast'], label='SES Forecast', marker='x')
plt.xlabel('Year')
plt.ylabel('SO')
plt.xticks(rotation=45)
plt.title('Actual SO vs SES Forecast (Yearly)')
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Function to generate future forecasts for years
def generate_future_forecasts_yearly(data, periods, alpha):
    last_actual = data['SO'].iloc[-1]
    last_forecast = data['SES Forecast'].iloc[-1]
    
    future_years = np.arange(data['Year'].iloc[-1] + 1, data['Year'].iloc[-1] + periods + 1)
    future_forecasts = []
    
    for _ in range(periods):
        next_forecast = alpha * last_actual + (1 - alpha) * last_forecast
        future_forecasts.append(next_forecast)
        last_actual = next_forecast  # Assume forecast becomes the actual for next period
        last_forecast = next_forecast
    
    future_df = pd.DataFrame({
        'Year': future_years,
        'SO': np.nan,
        'SES Forecast': future_forecasts
    })
    
    return future_df

# Generate 5 years of future forecasts
future_periods = 5
future_forecasts = generate_future_forecasts_yearly(yearly_summary, future_periods, alpha)

# Combine historical data with future forecasts
combined_yearly_data = pd.concat([yearly_summary, future_forecasts]).reset_index(drop=True)

# Plot the results including future forecasts
plt.figure(figsize=(15, 8))
plt.plot(combined_yearly_data['Year'], combined_yearly_data['SO'], label='Actual SO', marker='o')
plt.plot(combined_yearly_data['Year'], combined_yearly_data['SES Forecast'], label='SES Forecast', marker='x')
plt.axvline(x=yearly_summary['Year'].iloc[-1], color='r', linestyle='--', label='Forecast Start')
plt.xlabel('Year')
plt.ylabel('SO')
plt.title('Actual SO vs SES Forecast (Including Future Predictions)')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Display the future forecasts
print("\nFuture Forecasts:")
print(future_forecasts)

# Calculate and display overall error metrics
overall_mad = yearly_summary['MAD'].mean()
overall_mse = yearly_summary['MSE'].mean()
overall_mape = yearly_summary['MAPE'].mean()

print(f"\nOverall MAD: {overall_mad:.2f}")
print(f"Overall MSE: {overall_mse:.2f}")
print(f"Overall MAPE: {overall_mape:.2f}%")