In [156]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta

In [157]:
df = pd.read_csv("../processed-data.csv")
df

Unnamed: 0,Date,Value,MovingAvg,PercAvg_Pred,ExpSmooth
0,2010-01-01,388.91,,,388.91000
1,2010-02-01,390.41,,,389.36000
2,2010-03-01,391.37,,,389.96300
3,2010-04-01,392.67,,,390.77510
4,2010-05-01,393.21,,,391.50557
...,...,...,...,...,...
203,2026-12-01,,,,
204,2027-01-01,,,,
205,2027-02-01,,,,
206,2027-03-01,,,,


In [158]:
def evaluateModel(df, predictedColumnName):
    validDf = df.dropna(subset=["Value", predictedColumnName])
    
    if len(validDf) == 0:
        return {
            "mse": np.nan,
            "rmse": np.nan,
            "mae": np.nan,
            "mape": np.nan
        }
    
    actual = validDf["Value"]
    predicted = validDf[predictedColumnName]
    
    mse = np.mean((actual - predicted) ** 2)
    
    rmse = np.sqrt(mse)
    
    mae = np.mean(np.abs(actual - predicted))
    
    nonZeroActual = actual[actual != 0]
    nonZeroPredicted = predicted[actual != 0]
    
    if len(nonZeroActual) > 0:
        mape = np.mean(np.abs((nonZeroActual - nonZeroPredicted) / nonZeroActual)) * 100
    else:
        mape = np.nan
    
    return {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "mape": mape
    }

In [159]:
def movingAvg(data, windowSize):
    weights = np.ones(windowSize) / windowSize
    return np.convolve(data, weights, mode='valid')


def ratioToMovingAverage(data, windowSize):
    ma = movingAvg(data, windowSize)
    paddedMa = np.pad(ma, (windowSize-1, 0), 'constant', constant_values=np.nan)
    ratio = data / paddedMa * 100
    return ratio, paddedMa

def seasonalIndex(df, ratio_col):
    df = df.copy()
    df['Month'] = pd.to_datetime(df['Date']).dt.month
    monthly_index = df.groupby('Month')[ratio_col].mean()
    monthly_index = monthly_index / monthly_index.mean() * 100
    return monthly_index


In [None]:
def predictWithRMA(df, windowSize, nFuture=12):
    # work with valid data only for calculations
    validDf = df[df['Value'].notna()].copy()
    data = validDf['Value'].values
    ratio, paddedMa = ratioToMovingAverage(data, windowSize)
    
    # create extended dataframe starting from original
    extendedDf = df.copy()
    extendedDf['Date'] = pd.to_datetime(extendedDf['Date'])
    
    # add RMA columns to extended dataframe
    if 'RMA' not in extendedDf.columns:
        extendedDf['RMA'] = np.nan
    if 'RMA_Pred' not in extendedDf.columns:
        extendedDf['RMA_Pred'] = np.nan
    
    # populate RMA values for valid data rows
    validIndices = extendedDf[extendedDf['Value'].notna()].index
    extendedDf.loc[validIndices, 'RMA'] = ratio
    extendedDf.loc[validIndices, 'MovingAvg'] = paddedMa
    
    # calculate seasonal index from valid data
    idxSeasonal = seasonalIndex(validDf.assign(RMA=ratio), 'RMA')
    
    # get last valid date
    lastValidRow = validDf.iloc[-1]
    lastDate = pd.to_datetime(lastValidRow['Date'])
    
    # generate predictions for 12 months after last valid data
    for i in range(nFuture):
        futureDate = lastDate + relativedelta(months=i+1)
        ma = np.mean(data[-windowSize:])
        predMonth = futureDate.month
        seasonal = idxSeasonal.loc[predMonth]
        pred = ma * seasonal / 100
        
        # check if this date already exists
        existingRow = extendedDf[extendedDf['Date'] == futureDate]
        
        if len(existingRow) > 0:
            # update existing row
            extendedDf.loc[extendedDf['Date'] == futureDate, 'RMA_Pred'] = pred
        else:
            # add new row
            newRow = pd.DataFrame({
                'Date': [futureDate],
                'Value': [np.nan],
                'MovingAvg': [np.nan],
                'RMA': [np.nan],
                'RMA_Pred': [pred]
            })
            extendedDf = pd.concat([extendedDf, newRow], ignore_index=True)
        
        # update data array for next iteration
        data = np.append(data, pred)
    
    # add historical predictions (for evaluation)
    for i in range(windowSize-1, len(validDf)):
        validIdx = validIndices[i]
        month = pd.to_datetime(extendedDf.loc[validIdx, 'Date']).month
        seasonal = idxSeasonal.loc[month]
        extendedDf.loc[validIdx, 'RMA_Pred'] = extendedDf.loc[validIdx, 'MovingAvg'] * seasonal / 100
    
    # reset index
    extendedDf = extendedDf.sort_values('Date').reset_index(drop=True)
    
    # convert back to string format
    extendedDf['Date'] = extendedDf['Date'].dt.strftime('%Y-%m-%d')
    
    # display future predictions
    futurePredictions = extendedDf[extendedDf['RMA_Pred'].notna() & extendedDf['Value'].isna()]
    print("future predictions:")
    print(futurePredictions[['Date', 'RMA_Pred']].to_string())
    
    return extendedDf, idxSeasonal

def execRMA(windowSize, nFuture=12):
    global df
    dfLocal = df.copy()
    resultDf, idxSeasonal = predictWithRMA(dfLocal, windowSize, nFuture)
    print('indeks musiman bulanan (RMA):')
    print(idxSeasonal)
    print('\nevaluasi (hanya data historis):')
    print(evaluateModel(resultDf, 'RMA_Pred'))
    print('\npreview hasil:')
    print(resultDf.head(20))
    return resultDf

In [161]:
df = execRMA(12, nFuture=12)
df

future predictions:
           Date    RMA_Pred
184  2025-05-01  429.019647
185  2025-06-01  428.341040
186  2025-07-01  426.355635
187  2025-08-01  424.020525
188  2025-09-01  422.504466
189  2025-10-01  422.605955
190  2025-11-01  424.077656
191  2025-12-01  425.372094
192  2026-01-01  426.433512
193  2026-02-01  427.058400
194  2026-03-01  427.618876
195  2026-04-01  428.925359
indeks musiman bulanan (RMA):
Month
1     100.066000
2     100.216877
3     100.349023
4     100.666070
5     100.796778
6     100.595594
7     100.101288
8      99.537353
9      99.161473
10     99.176090
11     99.517067
12     99.816385
Name: RMA, dtype: float64

evaluasi (hanya data historis):
{'mse': np.float64(1.4477875837222052), 'rmse': np.float64(1.203240451332237), 'mae': np.float64(1.1409608998720753), 'mape': np.float64(0.27902916269935923)}

preview hasil:
          Date   Value   MovingAvg  PercAvg_Pred   ExpSmooth         RMA  \
0   2010-01-01  388.91         NaN           NaN  388.910000      

Unnamed: 0,Date,Value,MovingAvg,PercAvg_Pred,ExpSmooth,RMA,RMA_Pred
0,2010-01-01,388.91,,,388.91000,,
1,2010-02-01,390.41,,,389.36000,,
2,2010-03-01,391.37,,,389.96300,,
3,2010-04-01,392.67,,,390.77510,,
4,2010-05-01,393.21,,,391.50557,,
...,...,...,...,...,...,...,...
203,2026-12-01,,,,,,
204,2027-01-01,,,,,,
205,2027-02-01,,,,,,
206,2027-03-01,,,,,,


In [162]:
# Export it
outPath = "../processed-data.csv"
df.to_csv(outPath, index=False)
df

Unnamed: 0,Date,Value,MovingAvg,PercAvg_Pred,ExpSmooth,RMA,RMA_Pred
0,2010-01-01,388.91,,,388.91000,,
1,2010-02-01,390.41,,,389.36000,,
2,2010-03-01,391.37,,,389.96300,,
3,2010-04-01,392.67,,,390.77510,,
4,2010-05-01,393.21,,,391.50557,,
...,...,...,...,...,...,...,...
203,2026-12-01,,,,,,
204,2027-01-01,,,,,,
205,2027-02-01,,,,,,
206,2027-03-01,,,,,,
