In [1]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta

In [2]:
df = pd.read_csv("../preprocessed-data.csv")
df

Unnamed: 0,Date,Value
0,2010-01-01,388.91
1,2010-02-01,390.41
2,2010-03-01,391.37
3,2010-04-01,392.67
4,2010-05-01,393.21
...,...,...
179,2024-12-01,425.40
180,2025-01-01,426.65
181,2025-02-01,427.09
182,2025-03-01,428.15


In [3]:
def evaluateModel(df, predictedColumnName):
    validDf = df.dropna(subset=["Value", predictedColumnName])
    
    if len(validDf) == 0:
        return {
            "mse": np.nan,
            "rmse": np.nan,
            "mae": np.nan,
            "mape": np.nan
        }
    
    actual = validDf["Value"]
    predicted = validDf[predictedColumnName]
    
    mse = np.mean((actual - predicted) ** 2)
    
    rmse = np.sqrt(mse)
    
    mae = np.mean(np.abs(actual - predicted))
    
    nonZeroActual = actual[actual != 0]
    nonZeroPredicted = predicted[actual != 0]
    
    if len(nonZeroActual) > 0:
        mape = np.mean(np.abs((nonZeroActual - nonZeroPredicted) / nonZeroActual)) * 100
    else:
        mape = np.nan
    
    return {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "mape": mape
    }

In [4]:
def movingAvg(data, windowSize):
    weights = np.ones(windowSize) / windowSize
    return np.convolve(data, weights, mode='valid')


def ratioToMovingAverage(data, windowSize):
    ma = movingAvg(data, windowSize)
    paddedMa = np.pad(ma, (windowSize-1, 0), 'constant', constant_values=np.nan)
    ratio = data / paddedMa * 100
    return ratio, paddedMa

def seasonalIndex(df, ratio_col):
    df = df.copy()
    df['Month'] = pd.to_datetime(df['Date']).dt.month
    monthly_index = df.groupby('Month')[ratio_col].mean()
    monthly_index = monthly_index / monthly_index.mean() * 100
    return monthly_index


In [5]:
def predictWithRMA(df, windowSize, n_future=12):
    data = df['Value'].values
    ratio, paddedMa = ratioToMovingAverage(data, windowSize)
    df['MovingAvg'] = paddedMa
    df['RMA'] = ratio
    idx_seasonal = seasonalIndex(df, 'RMA')
    lastDate = pd.to_datetime(df['Date'].iloc[-1])
    futureDates = []
    futureValues = []
    futurePreds = []
    for i in range(n_future):
        ma = np.mean(data[-windowSize:])
        pred_month = (lastDate.month + i) % 12
        pred_month = 12 if pred_month == 0 else pred_month
        seasonal = idx_seasonal.loc[pred_month]
        pred = ma * seasonal / 100
        futureDates.append((lastDate + relativedelta(months=i+1)).strftime('%Y-%m-%d'))
        futureValues.append(np.nan)
        futurePreds.append(pred)
        data = np.append(data, pred)
    futureDf = pd.DataFrame({
        'Date': futureDates,
        'Value': futureValues,
        'RMA_Pred': futurePreds
    })
    df['RMA_Pred'] = np.nan
    for i in range(windowSize-1, len(df)):
        month = pd.to_datetime(df.loc[i, 'Date']).month
        seasonal = idx_seasonal.loc[month]
        df.loc[i, 'RMA_Pred'] = df.loc[i, 'MovingAvg'] * seasonal / 100
    extendedDf = pd.concat([df, futureDf], ignore_index=True)
    return extendedDf, idx_seasonal

def execRMA(windowSize, n_future=12):
    global df
    df_local = df.copy()
    resultDf, idx_seasonal = predictWithRMA(df_local, windowSize, n_future)
    print('Indeks Musiman Bulanan (RMA):')
    print(idx_seasonal)
    print('\\nEvaluasi (hanya data historis):')
    print(evaluateModel(resultDf, 'RMA_Pred'))
    print('\\nPreview hasil:')
    print(resultDf.head(20))
    return resultDf


In [6]:
df = execRMA(12, n_future=12)
df

Indeks Musiman Bulanan (RMA):
Month
1     100.066000
2     100.216877
3     100.349023
4     100.666070
5     100.796778
6     100.595594
7     100.101288
8      99.537353
9      99.161473
10     99.176090
11     99.517067
12     99.816385
Name: RMA, dtype: float64
\nEvaluasi (hanya data historis):
{'mse': 1.4477875837222183, 'rmse': 1.2032404513322423, 'mae': 1.1409608998720822, 'mape': 0.27902916269936096}
\nPreview hasil:
          Date   Value   MovingAvg         RMA    RMA_Pred
0   2010-01-01  388.91         NaN         NaN         NaN
1   2010-02-01  390.41         NaN         NaN         NaN
2   2010-03-01  391.37         NaN         NaN         NaN
3   2010-04-01  392.67         NaN         NaN         NaN
4   2010-05-01  393.21         NaN         NaN         NaN
5   2010-06-01  392.38         NaN         NaN         NaN
6   2010-07-01  390.41         NaN         NaN         NaN
7   2010-08-01  388.54         NaN         NaN         NaN
8   2010-09-01  387.03         NaN      

Unnamed: 0,Date,Value,MovingAvg,RMA,RMA_Pred
0,2010-01-01,388.91,,,
1,2010-02-01,390.41,,,
2,2010-03-01,391.37,,,
3,2010-04-01,392.67,,,
4,2010-05-01,393.21,,,
...,...,...,...,...,...
191,2025-12-01,,,,424.650464
192,2026-01-01,,,,425.865341
193,2026-02-01,,,,426.864888
194,2026-03-01,,,,427.489706


In [7]:
# Export it
outPath = "D:\KULIAH\KULIAH Y2\PCD\Program\statistika-final-proj/ratio-moving-avg-data.csv"
df.to_csv(outPath, index=False)
df

  outPath = "D:\KULIAH\KULIAH Y2\PCD\Program\statistika-final-proj/ratio-moving-avg-data.csv"


Unnamed: 0,Date,Value,MovingAvg,RMA,RMA_Pred
0,2010-01-01,388.91,,,
1,2010-02-01,390.41,,,
2,2010-03-01,391.37,,,
3,2010-04-01,392.67,,,
4,2010-05-01,393.21,,,
...,...,...,...,...,...
191,2025-12-01,,,,424.650464
192,2026-01-01,,,,425.865341
193,2026-02-01,,,,426.864888
194,2026-03-01,,,,427.489706
