# Preparing training data

In [5]:
# -- import packages -- 
from pathlib import Path
import numpy as np
import xgboost as xgb
import json
from sklearn.model_selection import train_test_split
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [6]:
training_data_file_name = 'training_data_14.json'
training_data_file_path = Path().absolute().parent / 'training_data' / training_data_file_name

with open(training_data_file_path, 'r', encoding='utf-8') as f:
    training_data = json.load(f)

# X, y = np.array(training_data.get('request_number')).reshape(-1, 1), np.array([training_data.get('user_cpu_time')[i] + training_data.get('system_cpu_time')[i] for i in range(len(training_data.get('user_cpu_time')))])
X, y = (np.array(training_data.get('request_number'))).reshape(-1, 1), np.array(training_data.get('real_processing_time'))

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=43
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=43
)

# Xgboost

In [7]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dval   = xgb.DMatrix(X_val,   label=y_val)
dtest  = xgb.DMatrix(X_test,  label=y_test)

# Create XGBoost regression model
model = XGBRegressor(
    objective="reg:squarederror",
    n_estimators=100,   # 基础学习器（树）的数量
    max_depth=5,        # 树的最大深度
    colsample_bytree=0.7,   # 随机选择特征的比例
    random_state=42,     # 保持结果可复现
)

model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mea = mean_absolute_error(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mea}")


print(f"    real    |    pred   \n")
for i in range(len(y_test)):
    print(f"    {y_test[i]}     |     {y_pred[i]}      ")

Mean Squared Error: 0.06381240236444247
Mean Absolute Error: 0.17923703908309108
    real    |    pred   

    0.22593555500498042     |     0.6044009923934937      
    0.09057151302113198     |     0.6044009923934937      
    5.859078565001255     |     5.870300769805908      
    1.5981379919976462     |     1.5633071660995483      
    10.591882100998191     |     10.424696922302246      
    1.3844525390013587     |     1.3407186269760132      
    2.201748966006562     |     1.9338699579238892      
    0.5876507259963546     |     0.6044009923934937      


# Save model

In [8]:
model.save_model(Path() / 'modelsfile' / 'xgboost_newest_model_2.json')