In [1]:
!pip install optuna

Collecting optuna
  Downloading optuna-4.3.0-py3-none-any.whl.metadata (17 kB)
Collecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)
Downloading optuna-4.3.0-py3-none-any.whl (386 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m386.6/386.6 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading alembic-1.15.2-py3-none-any.whl (231 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m231.9/231.9 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)
Installing collected packages: colorlog, alembic, optuna
Successfully installed alembic-1.15.2 colorlog-6.9.0 optuna-4.3.0


# Service 1

In [3]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import optuna
from optuna.samplers import TPESampler

# Load dataset
df = pd.read_csv("Service1.csv")

# Input and output
X = df[['latency_ms', 'cpu_usage_pct', 'memory_usage_pct', 'memory_allocated', 'cpu_allocated']]
y = df[['cpu_usage_pct', 'memory_usage_pct']]

# Split: 70% train, 15% val, 15% test
X_train_full, X_temp, y_train_full, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale features
x_scaler = StandardScaler()
y_scaler = StandardScaler()

X_train_scaled = x_scaler.fit_transform(X_train_full)
X_val_scaled = x_scaler.transform(X_val)
X_test_scaled = x_scaler.transform(X_test)

y_train_scaled = y_scaler.fit_transform(y_train_full)
y_val_scaled = y_scaler.transform(y_val)

# Objective for Optuna
def objective(trial):
    C = trial.suggest_float('C', 0.1, 100.0, log=True)
    epsilon = trial.suggest_float('epsilon', 0.001, 1.0, log=True)
    gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])

    svr_model = MultiOutputRegressor(SVR(C=C, epsilon=epsilon, gamma=gamma))
    svr_model.fit(X_train_scaled, y_train_scaled)
    preds_scaled = svr_model.predict(X_val_scaled)
    mse = mean_squared_error(y_val_scaled, preds_scaled)
    return mse

# Run optimization
study = optuna.create_study(direction="minimize", sampler=TPESampler())
study.optimize(objective, n_trials=30)

# Best model training
best_params = study.best_params
print("Best Parameters:", best_params)

final_svr = MultiOutputRegressor(SVR(**best_params))
final_svr.fit(np.vstack([X_train_scaled, X_val_scaled]), np.vstack([y_train_scaled, y_val_scaled]))

# Test set evaluation
y_test_scaled = y_scaler.transform(y_test)
preds_scaled = final_svr.predict(X_test_scaled)
preds = y_scaler.inverse_transform(preds_scaled)

mse = mean_squared_error(y_test, preds)
rmse = np.sqrt(mse)
print(f"\nTest MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# Sample prediction
sample = pd.DataFrame([[300, 0.25, 500, 40, 75]], columns=['latency_ms', 'cpu_usage_pct', 'memory_usage_pct', 'memory_allocated', 'cpu_allocated'])
sample_scaled = x_scaler.transform(sample)
predicted_scaled = final_svr.predict(sample_scaled)
predicted = y_scaler.inverse_transform(predicted_scaled)

print(f"\nSample Input: {sample.to_dict(orient='records')[0]}")
print(f"Predicted CPU, Memory Usage: CPU={predicted[0][0]:.3f}, Memory={predicted[0][1]:.2f}")


[I 2025-05-06 02:31:14,266] A new study created in memory with name: no-name-cb112c3c-5833-43bb-a11b-fdc145191c90
[I 2025-05-06 02:31:14,500] Trial 0 finished with value: 0.0002029186391792377 and parameters: {'C': 0.5108763282765465, 'epsilon': 0.023353867569914557, 'gamma': 'auto'}. Best is trial 0 with value: 0.0002029186391792377.
[I 2025-05-06 02:31:14,565] Trial 1 finished with value: 0.0014311850719127461 and parameters: {'C': 5.661161849590573, 'epsilon': 0.0766579530696944, 'gamma': 'auto'}. Best is trial 0 with value: 0.0002029186391792377.
[I 2025-05-06 02:31:14,810] Trial 2 finished with value: 0.00011395844676320813 and parameters: {'C': 35.220707224831195, 'epsilon': 0.018794008738479095, 'gamma': 'scale'}. Best is trial 2 with value: 0.00011395844676320813.
[I 2025-05-06 02:31:15,042] Trial 3 finished with value: 0.00019351977357871832 and parameters: {'C': 0.40526885973456567, 'epsilon': 0.022587501975572112, 'gamma': 'scale'}. Best is trial 2 with value: 0.000113958446

Best Parameters: {'C': 94.98769196969427, 'epsilon': 0.0010845917123136078, 'gamma': 'auto'}

Test MSE: 0.0004
Test RMSE: 0.0209

Sample Input: {'latency_ms': 300, 'cpu_usage_pct': 0.25, 'memory_usage_pct': 500, 'memory_allocated': 40, 'cpu_allocated': 75}
Predicted CPU, Memory Usage: CPU=63.791, Memory=29.73


In [4]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, preds)
print(f"Test MAE: {mae:.4f}")


Test MAE: 0.0172


# Service 2

In [None]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error
import optuna
from optuna.samplers import TPESampler

# Load dataset
df = pd.read_csv("Service2.csv")

# Input and output
X = df[['latency_ms', 'cpu_usage_pct', 'memory_usage_pct', 'memory_allocated', 'cpu_allocated']]
y = df[['cpu_usage_pct', 'memory_usage_pct']]

# Split: 70% train, 15% val, 15% test
X_train_full, X_temp, y_train_full, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Scale features
x_scaler = StandardScaler()
y_scaler = StandardScaler()

X_train_scaled = x_scaler.fit_transform(X_train_full)
X_val_scaled = x_scaler.transform(X_val)
X_test_scaled = x_scaler.transform(X_test)

y_train_scaled = y_scaler.fit_transform(y_train_full)
y_val_scaled = y_scaler.transform(y_val)

# Objective for Optuna
def objective(trial):
    C = trial.suggest_float('C', 0.1, 100.0, log=True)
    epsilon = trial.suggest_float('epsilon', 0.001, 1.0, log=True)
    gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])

    svr_model = MultiOutputRegressor(SVR(C=C, epsilon=epsilon, gamma=gamma))
    svr_model.fit(X_train_scaled, y_train_scaled)
    preds_scaled = svr_model.predict(X_val_scaled)
    mse = mean_squared_error(y_val_scaled, preds_scaled)
    return mse

# Run optimization
study = optuna.create_study(direction="minimize", sampler=TPESampler())
study.optimize(objective, n_trials=30)

# Best model training
best_params = study.best_params
print("Best Parameters:", best_params)

final_svr = MultiOutputRegressor(SVR(**best_params))
final_svr.fit(np.vstack([X_train_scaled, X_val_scaled]), np.vstack([y_train_scaled, y_val_scaled]))

# Test set evaluation
y_test_scaled = y_scaler.transform(y_test)
preds_scaled = final_svr.predict(X_test_scaled)
preds = y_scaler.inverse_transform(preds_scaled)

mse = mean_squared_error(y_test, preds)
rmse = np.sqrt(mse)
print(f"\nTest MSE: {mse:.4f}")
print(f"Test RMSE: {rmse:.4f}")

# Sample prediction
sample = pd.DataFrame([[300, 0.25, 500, 40, 75]], columns=['latency_ms', 'cpu_usage_pct', 'memory_usage_pct', 'memory_allocated', 'cpu_allocated'])
sample_scaled = x_scaler.transform(sample)
predicted_scaled = final_svr.predict(sample_scaled)
predicted = y_scaler.inverse_transform(predicted_scaled)

print(f"\nSample Input: {sample.to_dict(orient='records')[0]}")
print(f"Predicted CPU, Memory Usage: CPU={predicted[0][0]:.3f}, Memory={predicted[0][1]:.2f}")


[I 2025-05-06 02:34:05,195] A new study created in memory with name: no-name-435279eb-37d5-48aa-a48b-d16e328ad41c
[I 2025-05-06 02:34:05,522] Trial 0 finished with value: 8.490895338060277e-05 and parameters: {'C': 0.5791849122906593, 'epsilon': 0.015826441593055664, 'gamma': 'auto'}. Best is trial 0 with value: 8.490895338060277e-05.
[I 2025-05-06 02:34:06,999] Trial 1 finished with value: 5.272599469108338e-06 and parameters: {'C': 77.7838374435823, 'epsilon': 0.004119597625176744, 'gamma': 'auto'}. Best is trial 1 with value: 5.272599469108338e-06.
[I 2025-05-06 02:34:07,025] Trial 2 finished with value: 0.12721323805035234 and parameters: {'C': 0.6915953408930543, 'epsilon': 0.44274502629887885, 'gamma': 'scale'}. Best is trial 1 with value: 5.272599469108338e-06.
[I 2025-05-06 02:34:07,623] Trial 3 finished with value: 2.1663415831075603e-05 and parameters: {'C': 27.449714218141036, 'epsilon': 0.008369475408586494, 'gamma': 'auto'}. Best is trial 1 with value: 5.272599469108338e-0