In [170]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta

In [171]:
df = pd.read_csv("../preprocessed-data.csv")
df

Unnamed: 0,Date,Value
0,2010-01-01,388.91
1,2010-02-01,390.41
2,2010-03-01,391.37
3,2010-04-01,392.67
4,2010-05-01,393.21
...,...,...
179,2024-12-01,425.40
180,2025-01-01,426.65
181,2025-02-01,427.09
182,2025-03-01,428.15


In [172]:
def evaluateModel(df, predictedColumnName):
    validDf = df.dropna(subset=["Value", predictedColumnName])
    
    if len(validDf) == 0:
        return {
            "mse": np.nan,
            "rmse": np.nan,
            "mae": np.nan,
            "mape": np.nan
        }
    
    actual = validDf["Value"]
    predicted = validDf[predictedColumnName]
    
    mse = np.mean((actual - predicted) ** 2)
    
    rmse = np.sqrt(mse)
    
    mae = np.mean(np.abs(actual - predicted))
    
    nonZeroActual = actual[actual != 0]
    nonZeroPredicted = predicted[actual != 0]
    
    if len(nonZeroActual) > 0:
        mape = np.mean(np.abs((nonZeroActual - nonZeroPredicted) / nonZeroActual)) * 100
    else:
        mape = np.nan
    
    return {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "mape": mape
    }

In [173]:
def movingAvg(data, windowSize):
    weights = np.ones(windowSize) / windowSize
    return np.convolve(data, weights, mode='valid')


def predictWithMovingAvg(data, windowSize):
    predictions = []
    workingData = data.copy()
    
    for _ in range(windowSize):
        nextValue = np.mean(workingData[-windowSize:])
        predictions.append(nextValue)
        workingData = np.append(workingData, nextValue)
    
    return predictions


In [174]:
def execMovingAvg(windowSize):
	ma = movingAvg(df["Value"].values, windowSize)


	# we are not predicting the first x row, x being the windowSize
	paddedMa = np.pad(ma, (windowSize-1, 0), 'constant', constant_values=np.nan)
	paddedMa


	df['MovingAvg'] = paddedMa


	# prediction
	lastDate = df['Date'].iloc[-1]
	lastDate = pd.to_datetime(lastDate)

	predictions = predictWithMovingAvg(df["Value"].values, windowSize)

	futureDates = []
	futureValues = []
	futureMovingAvgs = []

	for i in range(windowSize):
		newDate = lastDate + relativedelta(months=i+1)
		futureDates.append(newDate.strftime('%Y-%m-%d')) 
		futureValues.append(np.nan)
		futureMovingAvgs.append(predictions[i])

	futureDf = pd.DataFrame({
		'Date': futureDates,
		'Value': futureValues,
		'MovingAvg': futureMovingAvgs
	})

	extendedDf = pd.concat([df, futureDf], ignore_index=True)

	print(evaluateModel(df, "MovingAvg"), "\n")

	# i wanna see all
	print(extendedDf.to_string())
	return extendedDf


In [175]:
df = execMovingAvg(12)
df

{'mse': np.float64(6.246450084296788), 'rmse': np.float64(2.4992899160155044), 'mae': np.float64(2.151112716763011), 'mape': np.float64(0.5261777694568053)} 

           Date   Value   MovingAvg
0    2010-01-01  388.91         NaN
1    2010-02-01  390.41         NaN
2    2010-03-01  391.37         NaN
3    2010-04-01  392.67         NaN
4    2010-05-01  393.21         NaN
5    2010-06-01  392.38         NaN
6    2010-07-01  390.41         NaN
7    2010-08-01  388.54         NaN
8    2010-09-01  387.03         NaN
9    2010-10-01  387.43         NaN
10   2010-11-01  388.87         NaN
11   2010-12-01  389.99  390.101667
12   2011-01-01  391.50  390.317500
13   2011-02-01  392.05  390.454167
14   2011-03-01  392.80  390.573333
15   2011-04-01  393.44  390.637500
16   2011-05-01  394.41  390.737500
17   2011-06-01  393.95  390.868333
18   2011-07-01  392.72  391.060833
19   2011-08-01  390.33  391.210000
20   2011-09-01  389.28  391.397500
21   2011-10-01  389.19  391.544167
22   2011-11-

Unnamed: 0,Date,Value,MovingAvg
0,2010-01-01,388.91,
1,2010-02-01,390.41,
2,2010-03-01,391.37,
3,2010-04-01,392.67,
4,2010-05-01,393.21,
...,...,...,...
191,2025-12-01,,426.379554
192,2026-01-01,,426.461183
193,2026-02-01,,426.445448
194,2026-03-01,,426.391736


In [176]:
# Export it
outPath = "/home/alie/kuliah/Sm4/Statistika/finale-statistika/processed-data.csv"
df.to_csv(outPath, index=False)

df

Unnamed: 0,Date,Value,MovingAvg
0,2010-01-01,388.91,
1,2010-02-01,390.41,
2,2010-03-01,391.37,
3,2010-04-01,392.67,
4,2010-05-01,393.21,
...,...,...,...
191,2025-12-01,,426.379554
192,2026-01-01,,426.461183
193,2026-02-01,,426.445448
194,2026-03-01,,426.391736
