In [1]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta

In [2]:
df = pd.read_csv("../preprocessed-data.csv")
df

Unnamed: 0,Date,Value
0,2010-01-01,388.91
1,2010-02-01,390.41
2,2010-03-01,391.37
3,2010-04-01,392.67
4,2010-05-01,393.21
...,...,...
179,2024-12-01,425.40
180,2025-01-01,426.65
181,2025-02-01,427.09
182,2025-03-01,428.15


In [3]:
def evaluateModel(df, predictedColumnName):
    validDf = df.dropna(subset=["Value", predictedColumnName])
    
    if len(validDf) == 0:
        return {
            "mse": np.nan,
            "rmse": np.nan,
            "mae": np.nan,
            "mape": np.nan
        }
    
    actual = validDf["Value"]
    predicted = validDf[predictedColumnName]
    
    mse = np.mean((actual - predicted) ** 2)
    
    rmse = np.sqrt(mse)
    
    mae = np.mean(np.abs(actual - predicted))
    
    nonZeroActual = actual[actual != 0]
    nonZeroPredicted = predicted[actual != 0]
    
    if len(nonZeroActual) > 0:
        mape = np.mean(np.abs((nonZeroActual - nonZeroPredicted) / nonZeroActual)) * 100
    else:
        mape = np.nan
    
    return {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "mape": mape
    }

In [4]:
def percentageAverageSeasonalIndex(df):
    df = df.copy()
    df['Month'] = pd.to_datetime(df['Date']).dt.month
    overall_avg = df['Value'].mean()
    df['Percentage'] = df['Value'] / overall_avg * 100
    monthly_index = df.groupby('Month')['Percentage'].mean()
    monthly_index = monthly_index / monthly_index.mean() * 100
    return monthly_index, overall_avg


In [5]:
def predictWithPercentageAverage(df, n_future=12):
    idx_seasonal, overall_avg = percentageAverageSeasonalIndex(df)
    lastDate = pd.to_datetime(df['Date'].iloc[-1])
    futureDates = []
    futureValues = []
    futurePreds = []
    for i in range(n_future):
        pred_month = (lastDate.month + i) % 12
        pred_month = 12 if pred_month == 0 else pred_month
        seasonal = idx_seasonal.loc[pred_month]
        pred = overall_avg * seasonal / 100
        futureDates.append((lastDate + relativedelta(months=i+1)).strftime('%Y-%m-%d'))
        futureValues.append(np.nan)
        futurePreds.append(pred)
    futureDf = pd.DataFrame({
        'Date': futureDates,
        'Value': futureValues,
        'PercAvg_Pred': futurePreds
    })
    df['PercAvg_Pred'] = np.nan
    for i in range(len(df)):
        month = pd.to_datetime(df.loc[i, 'Date']).month
        seasonal = idx_seasonal.loc[month]
        df.loc[i, 'PercAvg_Pred'] = overall_avg * seasonal / 100
    extendedDf = pd.concat([df, futureDf], ignore_index=True)
    return extendedDf, idx_seasonal, overall_avg

def execPercentageAverage(n_future=12):
    global df
    df_local = df.copy()
    resultDf, idx_seasonal, overall_avg = predictWithPercentageAverage(df_local, n_future)
    print('Indeks Musiman Bulanan (Percentage Average):')
    print(idx_seasonal)
    print(f'Rata-rata keseluruhan: {overall_avg:.2f}')
    print('\nEvaluasi (hanya data historis):')
    print(evaluateModel(resultDf, 'PercAvg_Pred'))
    print('\nPreview hasil:')
    print(resultDf.head(20))
    return resultDf


In [6]:
df = execPercentageAverage(n_future=12)
df

Indeks Musiman Bulanan (Percentage Average):
Month
1      99.994766
2     100.207281
3     100.392772
4     100.758839
5     100.615483
6     100.462996
7     100.016181
8      99.505324
9      99.176928
10     99.244573
11     99.634881
12     99.989975
Name: Percentage, dtype: float64
Rata-rata keseluruhan: 407.06

Evaluasi (hanya data historis):
{'mse': 119.84747240364968, 'rmse': 10.947487036011948, 'mae': 9.466821931143455, 'mape': 2.3271976322024654}

Preview hasil:
          Date   Value  PercAvg_Pred
0   2010-01-01  388.91    407.037443
1   2010-02-01  390.41    407.902507
2   2010-03-01  391.37    408.657562
3   2010-04-01  392.67    410.147672
4   2010-05-01  393.21    409.564129
5   2010-06-01  392.38    408.943416
6   2010-07-01  390.41    407.124616
7   2010-08-01  388.54    405.045130
8   2010-09-01  387.03    403.708365
9   2010-10-01  387.43    403.983718
10  2010-11-01  388.87    405.572502
11  2010-12-01  389.99    407.017941
12  2011-01-01  391.50    407.037443
13  2

Unnamed: 0,Date,Value,PercAvg_Pred
0,2010-01-01,388.91,407.037443
1,2010-02-01,390.41,407.902507
2,2010-03-01,391.37,408.657562
3,2010-04-01,392.67,410.147672
4,2010-05-01,393.21,409.564129
...,...,...,...
191,2025-12-01,,405.572502
192,2026-01-01,,407.017941
193,2026-02-01,,407.037443
194,2026-03-01,,407.902507


In [7]:
# Export it
outPath = "D:\KULIAH\KULIAH Y2\PCD\Program\statistika-final-proj/percentage-avg-data.csv"
df.to_csv(outPath, index=False)
df

  outPath = "D:\KULIAH\KULIAH Y2\PCD\Program\statistika-final-proj/percentage-avg-data.csv"


Unnamed: 0,Date,Value,PercAvg_Pred
0,2010-01-01,388.91,407.037443
1,2010-02-01,390.41,407.902507
2,2010-03-01,391.37,408.657562
3,2010-04-01,392.67,410.147672
4,2010-05-01,393.21,409.564129
...,...,...,...
191,2025-12-01,,405.572502
192,2026-01-01,,407.017941
193,2026-02-01,,407.037443
194,2026-03-01,,407.902507
