In [48]:
# Import library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# import seaborn as sns
# from sklearn.model_selection import train_test_split
# from sklearn.linear_model import LinearRegression
# from sklearn.metrics import mean_squared_error
# from statsmodels.tsa.holtwinters import SimpleExpSmoothing

In [49]:
# Load data
df = pd.read_excel('../data.xlsx', sheet_name='Rekapan')

In [None]:
# Muncul data awal
df.head()

In [None]:
# Muncul data akhir
df.tail()

In [None]:
# Statistik deskriptif dasar
df.describe()

# Cek nilai yang hilang
df.isnull().sum()

# Visualisasi distribusi data
df.hist(bins=50, figsize=(20, 15))
plt.show()

In [None]:
# Filter kolom terkait
data_filtered = df[['Tanggal', 'SO', 'TERKIRIM', 'Harga Komoditas Bijih Besi', 'Indeks Produksi Dalam Negeri', 'Data Inflasi', 'Kurs']]

# Ganti nama kolom
data_filtered.columns = ['Tanggal', 'SO', 'Terkirim', 'Harga Komoditas', 'Indeks Produksi', 'Data Inflasi', 'Kurs']

# Konversi tipe data
data_filtered['Tanggal'] = pd.to_datetime(data_filtered['Tanggal'])

# Ganti tipe data 'Indeks Produksi'
data_filtered['Indeks Produksi'] = pd.to_numeric(data_filtered['Indeks Produksi'], errors='coerce')

# Buat kolom 'Year' dan 'Month'
# data_filtered['Year'] = data_filtered['Tanggal'].dt.year
# data_filtered['Month'] = data_filtered['Tanggal'].dt.month

data_filtered['bulan_tahun'] = data_filtered['Tanggal'].dt.to_period('M')

In [None]:
# Rekapan Bulanan
# monthly_summary = data_filtered.groupby(['Year', 'Month']).agg({
#     'SO': 'sum',
#     # 'Terkirim': 'sum',
#     # 'Harga Komoditas': 'mean',
#     # 'Indeks Produksi': 'mean',
#     # 'Data Inflasi': 'mean',
#     # 'Kurs': 'mean'
# }).reset_index()

monthly_summary = data_filtered.groupby('bulan_tahun').agg({
    'SO': 'sum',
    # Uncomment other columns as needed
    # 'Terkirim': 'sum',
    # 'Harga Komoditas': 'mean',
    # 'Indeks Produksi': 'mean',
    # 'Data Inflasi': 'mean',
    # 'Kurs': 'mean'
}).reset_index()

# Muncul Rekapan Bulanan
monthly_summary

In [None]:
# Set alpha value for SES
alpha = 0.1

# Initialize columns for SES forecast and error metrics
monthly_summary['SES Forecast'] = np.nan
monthly_summary['Error'] = np.nan
monthly_summary['MAD'] = np.nan
monthly_summary['MSE'] = np.nan
monthly_summary['MAPE'] = np.nan

# Set initial forecast to the first SO value
monthly_summary.loc[0, 'SES Forecast'] = monthly_summary.loc[0, 'SO']

# Apply SES formula and calculate error metrics for each subsequent row
for i in range(1, len(monthly_summary)):
    # SES Forecast
    monthly_summary.loc[i, 'SES Forecast'] = (alpha * monthly_summary.loc[i-1, 'SO']) + ((1 - alpha) * monthly_summary.loc[i-1, 'SES Forecast'])
    
    # Error
    monthly_summary.loc[i, 'Error'] = monthly_summary.loc[i, 'SO'] - monthly_summary.loc[i, 'SES Forecast']
    
    # MAD (Mean Absolute Deviation)
    monthly_summary.loc[i, 'MAD'] = abs(monthly_summary.loc[i, 'Error'])
    
    # MSE (Mean Squared Error)
    monthly_summary.loc[i, 'MSE'] = monthly_summary.loc[i, 'Error'] ** 2
    
    # MAPE (Mean Absolute Percentage Error)
    if monthly_summary.loc[i, 'SO'] != 0:
        monthly_summary.loc[i, 'MAPE'] = (abs(monthly_summary.loc[i, 'Error']) / monthly_summary.loc[i, 'SO']) * 100
    else:
        monthly_summary.loc[i, 'MAPE'] = np.nan

# Show the result
print(monthly_summary)

In [None]:
# Optional: Plot the actual vs forecasted values
plt.figure(figsize=(10, 6))
plt.plot(monthly_summary['bulan_tahun'].astype(str), monthly_summary['SO'], label='Actual SO', marker='o')
plt.plot(monthly_summary['bulan_tahun'].astype(str), monthly_summary['SES Forecast'], label='SES Forecast', marker='x')
plt.xlabel('Month-Year')
plt.ylabel('SO')
plt.xticks(rotation=45)
plt.title('Actual SO vs SES Forecast')
plt.legend()
plt.tight_layout()
plt.show()