In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Simulated data
dates = [datetime.now() - timedelta(hours=i) for i in range(100)][::-1]
cpu = np.random.normal(loc=0.5, scale=0.1, size=(100,))
cpu = np.clip(cpu, 0, 1)

df = pd.DataFrame({"timestamp": dates, "cpu_utilization": cpu})
df.to_csv("cpu_utilization.csv", index=False)

In [2]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from joblib import dump

# Use timestamps as ordinal values
df['timestamp_ordinal'] = pd.to_datetime(df['timestamp']).map(datetime.toordinal)
X = df[['timestamp_ordinal']]
y = df['cpu_utilization']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor()
model.fit(X_train, y_train)

dump(model, 'cpu_model.joblib')

['cpu_model.joblib']

In [3]:
from joblib import load
model = load('cpu_model.joblib')
pred = model.predict([[datetime.now().toordinal()]])
print(f"Predicted CPU usage: {pred[0]}")

Predicted CPU usage: 0.5203251578679144


