In [None]:
import sys, os

# Add the project root (one level up from notebooks/) to sys.path
sys.path.append(os.path.abspath('..'))

In [2]:
import scipy
import statsmodels.api as sm
print("scipy", scipy.__version__, "statsmodels", sm.__version__)

scipy 1.9.3 statsmodels 0.14.4


In [3]:
import pandas as pd
import numpy as np
from prophet import Prophet
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

# (Re)učitaj i pripremi podatke
df = pd.read_csv(
    '../data/historical_consumption.csv',
    skipinitialspace=True,
    parse_dates=['Datum'],
    dayfirst=True
)
df.rename(columns={'potrosnja':'Potrošnja'}, inplace=True)

# 2) Force the exact format yyyy-mm-dd
df['Datum'] = pd.to_datetime(df['Datum'], format='%Y-%m-%d', exact=True)

# 3) Rename and set index
df.rename(columns={'potrosnja':'Potrošnja'}, inplace=True)
df.set_index('Datum', inplace=True)

# Feature engineering i split
df['day_of_week'] = df.index.dayofweek
df['month']       = df.index.month
df['day']         = df.index.day

n     = len(df)
split = int(n * 0.8)
train = df.iloc[:split].copy()
valid = df.iloc[split:].copy()

In [4]:
# Priprema za Prophet
prophet_train = train.reset_index()[['Datum','Potrošnja']].rename(columns={'Datum':'ds','Potrošnja':'y'})
prophet_valid = valid.reset_index()[['Datum','Potrošnja']].rename(columns={'Datum':'ds','Potrošnja':'y'})

In [5]:
m = Prophet(daily_seasonality=True, weekly_seasonality=True, yearly_seasonality=False)
m.fit(prophet_train)

13:29:51 - cmdstanpy - INFO - Chain [1] start processing
13:29:51 - cmdstanpy - INFO - Chain [1] done processing


<prophet.forecaster.Prophet at 0x190a49028d0>

In [6]:
future = m.make_future_dataframe(periods=len(valid), freq='D')
forecast = m.predict(future)
# Uzmeš samo zadnjih N predikcija
pred_prophet = forecast.set_index('ds')['yhat'].iloc[-len(valid):].values

In [7]:
rmse_prophet = np.sqrt(mean_squared_error(prophet_valid['y'], pred_prophet))
print(f"Prophet RMSE: {rmse_prophet:.2f}")

Prophet RMSE: 691.94


In [8]:
# primjer ARIMA(1,1,1)
arima_order = (1, 1, 1)
arima_model = sm.tsa.ARIMA(train['Potrošnja'], order=arima_order).fit()
print(arima_model.summary())

                               SARIMAX Results                                
Dep. Variable:              Potrošnja   No. Observations:                   24
Model:                 ARIMA(1, 1, 1)   Log Likelihood                -171.489
Date:                Fri, 04 Jul 2025   AIC                            348.978
Time:                        13:29:51   BIC                            352.384
Sample:                    01-01-2023   HQIC                           349.834
                         - 01-24-2023                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.6161      0.312      1.972      0.049       0.004       1.228
ma.L1         -0.9988     10.039     -0.099      0.921     -20.675      18.677
sigma2       1.62e+05   1.62e+06      0.100      0.9

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


In [9]:
# predikcija za vizualizaciju
pred_arima = arima_model.forecast(steps=len(valid))
rmse_arima = np.sqrt(mean_squared_error(valid['Potrošnja'], pred_arima))
print(f"ARIMA{arima_order} RMSE: {rmse_arima:.2f}")

ARIMA(1, 1, 1) RMSE: 281.64


In [10]:
results = pd.DataFrame({
    'model': ['Prophet', f'ARIMA{arima_order}'],
    'rmse': [rmse_prophet, rmse_arima]
})
display(results)

Unnamed: 0,model,rmse
0,Prophet,691.941349
1,"ARIMA(1, 1, 1)",281.638909


In [11]:
# 4.6 Testiranje iz src/model.py
from src.model import train_and_forecast_arima

# Koristimo već pripremljene train i valid DataFrame-ove
result = train_and_forecast_arima(train, order=(1,1,1), periods=len(valid))

# Ispiši AIC te prvih 5 predikcija
print("ARIMA AIC:", result['aic'])
print("\nPrvih 5 predikcija:")
display(result['forecast'].head())

# (Opcionalno) RMSE na valid setu, ako želiš potvrditi:
from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(valid['Potrošnja'], result['forecast'], squared=False)
print(f"\nValid RMSE: {rmse:.2f}")

ModuleNotFoundError: No module named 'src'