<a href="https://colab.research.google.com/github/bmfmancini/Cacti-DB-Monitor-Template/blob/main/prophet_holt_POC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!apt install rrdtool librrd-dev
!pip install prophet
!pip install matplotlib
!pip install pandas
!pip install rrdtool
!pip install statsmodels


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  fonts-dejavu-core libdbi1 librrd8
Suggested packages:
  librrds-perl
The following NEW packages will be installed:
  fonts-dejavu-core libdbi1 librrd-dev librrd8 rrdtool
0 upgraded, 5 newly installed, 0 to remove and 38 not upgraded.
Need to get 1,814 kB of archives.
After this operation, 5,511 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 fonts-dejavu-core all 2.37-2build1 [1,041 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/main amd64 libdbi1 amd64 0.9.0-6build2 [26.6 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/main amd64 librrd8 amd64 1.7.2-3ubuntu6 [171 kB]
Get:4 http://archive.ubuntu.com/ubuntu jammy/main amd64 librrd-dev amd64 1.7.2-3ubuntu6 [206 kB]
Get:5 http://archive.ubuntu.com/ubuntu jammy/main amd64 rrdtool amd64 1.7.2-3ubuntu6 [370 kB]
Fetched 1,814 kB in 0s (5,098

In [3]:


import rrdtool
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
import datetime
import os
import math
import numpy as np
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from datetime import timezone







### Prophet Prediction Model

In [4]:
# ---------------------------
# Step 1: Read from RRD file
# ---------------------------
#rrd_file = "mancinisw_traffic_in_126.rrd"
rrd_file = "local_linux_machine_load_1min_2.rrd"

#ds_name = "traffic_in"
ds_name = "load_5min"


# fetch data (AVERAGE, but can also use MAX or MIN)
(start, end, step), names, data = rrdtool.fetch(rrd_file, "AVERAGE", "--start", "-30d")

# ---------------------------
# Step 2: Convert to DataFrame
# ---------------------------
timestamps = [datetime.datetime.fromtimestamp(start + i * step) for i in range(len(data))]
df = pd.DataFrame(data, columns=names, index=timestamps)

# Drop missing values (NaN)
df = df[[ds_name]].dropna().reset_index()
df.columns = ["ds", "y"]


# ---------------------------
# Step 3: Build & Train Prophet Model
# ---------------------------
model = Prophet(daily_seasonality=True, yearly_seasonality=False)
model.fit(df)

# ---------------------------
# Step 4: Forecast Future
# ---------------------------
future = model.make_future_dataframe(periods=2016, freq="5min")
#288 time points spaced 5 minutes apart
# e.g., 288  * 5min = 1 day forecast
forecast = model.predict(future)

# ---------------------------
# Step 5: Plot Results
# ---------------------------
fig1 = model.plot(forecast)
plt.title(f"Forecast for {ds_name}")
plt.show()

fig2 = model.plot_components(forecast)
plt.show()


#print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail())

OperationalError: opening 'local_linux_machine_load_1min_2.rrd': No such file or directory

### Holt Winters Method

In [None]:

# ---- CONFIG ----
RRD_PATH = "local_linux_machine_load_1min_2.rrd"   # Updated to match Prophet example
DS_NAME  = "load_5min"             # Updated to match Prophet example
CF       = "AVERAGE"        # consolidation function
START    = "-30d"           # Updated to match Prophet example (was -7d)
END      = "now"            # end time
FORECAST_HOURS = 168        # 1 week forecast (7 * 24 = 168 hours)
ASSUME_STEP_SECONDS = 300   # your RRD step (5 min = 300s)
SEASONAL_PERIODS = int((24*3600) / ASSUME_STEP_SECONDS)  # daily seasonality

# ---- 1) FETCH FROM RRD ----
# rrdtool.fetch returns: (start, end, step), ds_names, data_rows
(time_info, ds_names, data) = rrdtool.fetch(
    RRD_PATH, CF, "--start", START, "--end", END
)

# Find index of DS_NAME
try:
    ds_index = ds_names.index(DS_NAME)
except ValueError:
    raise SystemExit(f"DS '{DS_NAME}' not found in {RRD_PATH}. Available: {ds_names}")

start_ts, end_ts, step = time_info

# Build timestamps and values
timestamps = pd.to_datetime(np.arange(start_ts, end_ts, step), unit="s", utc=True)
values = [row[ds_index] if row is not None else np.nan for row in data[:len(timestamps)]]

# Create a series
s = pd.Series(values, index=timestamps, name=DS_NAME)

# ---- 2) CLEAN/RESAMPLE ----
# If your RRD step is stable, resampling may be optional; we still ensure regularity.
s = s.asfreq(f"{step}s")

# Fill small gaps (NaNs) for model fit; keep a copy for plotting raw points if you prefer
s_filled = s.interpolate(limit_direction="both").fillna(method="bfill").fillna(method="ffill")

# ---- 3) FIT HOLT-WINTERS ----
# Choose additive or multiplicative depending on your data pattern
model = ExponentialSmoothing(
    s_filled,
    trend="add",
    seasonal="add",
    seasonal_periods=SEASONAL_PERIODS
)

fit = model.fit(optimized=True)

# ---- 4) FORECAST ----
forecast_steps = int((FORECAST_HOURS * 3600) / step)
forecast_index = pd.date_range(
    start=s_filled.index[-1] + pd.Timedelta(seconds=step),
    periods=forecast_steps,
    freq=f"{step}s",
    tz=timezone.utc
)
yhat = fit.forecast(forecast_steps)
yhat.index = forecast_index

# Simple prediction intervals (empirical residual std):
resid = s_filled - fit.fittedvalues.reindex_like(s_filled)
sigma = resid.dropna().std()
upper = yhat + 2 * sigma
lower = yhat - 2 * sigma

In [None]:
# ---- 5) PLOT ----
plt.figure(figsize=(12, 5))
plt.plot(s.index, s.values, label="Observed")
plt.plot(yhat.index, yhat.values, label="HW Forecast")
plt.fill_between(yhat.index, lower.values, upper.values, alpha=0.2, label="±2σ band")
plt.title(f"Holt-Winters Forecast for {os.path.basename(RRD_PATH)}:{DS_NAME} ({CF})")
plt.xlabel("Time (UTC)")
plt.ylabel("Load Average")
plt.legend()
plt.tight_layout()
plt.show()

# ---- OPTIONAL: BASIC ANOMALY FLAGGING ON HISTORY ----
# Compare the last day of observations to fitted band to flag outliers
window = int((24 * 3600) / step)
obs_tail = s_filled.tail(window)
fit_tail = fit.fittedvalues.reindex_like(s_filled).tail(window)
upper_tail = fit_tail + 2 * sigma
lower_tail = fit_tail - 2 * sigma
anomalies = obs_tail[(obs_tail > upper_tail) | (obs_tail < lower_tail)]
print(f"Anomalies in last 24h: {len(anomalies)}")
