In [37]:
import pandas as pd
import numpy as np
from dateutil.relativedelta import relativedelta

In [38]:
df = pd.read_csv("../preprocessed-data.csv")
df

Unnamed: 0,Date,Value
0,2010-01-01,388.91
1,2010-02-01,390.41
2,2010-03-01,391.37
3,2010-04-01,392.67
4,2010-05-01,393.21
...,...,...
179,2024-12-01,425.40
180,2025-01-01,426.65
181,2025-02-01,427.09
182,2025-03-01,428.15


In [39]:
def evaluateModel(df, predictedColumnName):
    validDf = df.dropna(subset=["Value", predictedColumnName])
    
    if len(validDf) == 0:
        return {
            "mse": np.nan,
            "rmse": np.nan,
            "mae": np.nan,
            "mape": np.nan
        }
    
    actual = validDf["Value"]
    predicted = validDf[predictedColumnName]
    
    mse = np.mean((actual - predicted) ** 2)
    
    rmse = np.sqrt(mse)
    
    mae = np.mean(np.abs(actual - predicted))
    
    nonZeroActual = actual[actual != 0]
    nonZeroPredicted = predicted[actual != 0]
    
    if len(nonZeroActual) > 0:
        mape = np.mean(np.abs((nonZeroActual - nonZeroPredicted) / nonZeroActual)) * 100
    else:
        mape = np.nan
    
    return {
        "mse": mse,
        "rmse": rmse,
        "mae": mae,
        "mape": mape
    }

In [40]:
def predictWithMonteCarlo(df, n_future=12, n_sim=1000, window_size=12):
    values = df['Value'].dropna().values
    changes = values[1:] - values[:-1]
    mean_change = changes.mean()
    std_change = changes.std()
    dates = pd.to_datetime(df['Date'])

    montecarlo_preds = [np.nan] * window_size  
    for i in range(window_size, len(values)):
        sims = []
        for _ in range(n_sim):
            sim_value = values[i-window_size]
            for j in range(window_size):
                sim_value += np.random.normal(mean_change, std_change)
            sims.append(sim_value)
        pred = np.mean(sims)
        montecarlo_preds.append(pred)
    df['MonteCarlo_Pred'] = montecarlo_preds + [np.nan]*(len(df)-len(montecarlo_preds))

    last_value = values[-window_size]
    lastDate = dates.iloc[-1]
    futureDates = []
    futureValues = []
    futurePreds = []
    for i in range(n_future):
        sims = []
        for _ in range(n_sim):
            sim_value = last_value
            for j in range(window_size + i):
                sim_value += np.random.normal(mean_change, std_change)
            sims.append(sim_value)
        pred = np.mean(sims)
        futureDates.append((lastDate + pd.DateOffset(months=i+1)).strftime('%Y-%m-%d'))
        futureValues.append(np.nan)
        futurePreds.append(pred)
    futureDf = pd.DataFrame({
        'Date': futureDates,
        'Value': futureValues,
        'MonteCarlo_Pred': futurePreds
    })

    extendedDf = pd.concat([df, futureDf], ignore_index=True)

    print(evaluateModel(df, "MonteCarlo_Pred"))
    print(extendedDf.to_string())

    return extendedDf

In [41]:
df = predictWithMonteCarlo(df, n_future=12, n_sim=1000)
df

{'mse': 0.4988487851924824, 'rmse': 0.7062922802866264, 'mae': 0.5656632710997563, 'mape': 0.13868299656229918}
           Date   Value  MonteCarlo_Pred
0    2010-01-01  388.91              NaN
1    2010-02-01  390.41              NaN
2    2010-03-01  391.37              NaN
3    2010-04-01  392.67              NaN
4    2010-05-01  393.21              NaN
5    2010-06-01  392.38              NaN
6    2010-07-01  390.41              NaN
7    2010-08-01  388.54              NaN
8    2010-09-01  387.03              NaN
9    2010-10-01  387.43              NaN
10   2010-11-01  388.87              NaN
11   2010-12-01  389.99              NaN
12   2011-01-01  391.50       391.836622
13   2011-02-01  392.05       393.054216
14   2011-03-01  392.80       394.015634
15   2011-04-01  393.44       395.105690
16   2011-05-01  394.41       395.949242
17   2011-06-01  393.95       395.159554
18   2011-07-01  392.72       392.950094
19   2011-08-01  390.33       391.234730
20   2011-09-01  389.28    

Unnamed: 0,Date,Value,MonteCarlo_Pred
0,2010-01-01,388.91,
1,2010-02-01,390.41,
2,2010-03-01,391.37,
3,2010-04-01,392.67,
4,2010-05-01,393.21,
...,...,...,...
191,2025-12-01,,430.693922
192,2026-01-01,,431.414411
193,2026-02-01,,431.292537
194,2026-03-01,,432.270020


In [42]:
# Export it
outPath = "D:\KULIAH\KULIAH Y2\PCD\Program\statistika-final-proj\montecarlo-data.csv"
df.to_csv(outPath, index=False)
df

  outPath = "D:\KULIAH\KULIAH Y2\PCD\Program\statistika-final-proj\montecarlo-data.csv"


Unnamed: 0,Date,Value,MonteCarlo_Pred
0,2010-01-01,388.91,
1,2010-02-01,390.41,
2,2010-03-01,391.37,
3,2010-04-01,392.67,
4,2010-05-01,393.21,
...,...,...,...
191,2025-12-01,,430.693922
192,2026-01-01,,431.414411
193,2026-02-01,,431.292537
194,2026-03-01,,432.270020
