In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

def parse_rgb(rgb_str):
    """解析字符串形式的RGB值为数值列表"""
    return list(map(int, rgb_str.strip("()").split(", ")))

# 读取数据
data = pd.read_csv("data.csv")

# 解析 RGB 作为特征
for col in ["black_box", "red_circle", "green_triangle", "blue_pentagon", "real_rgb"]:
    data[[f"{col}_R", f"{col}_G", f"{col}_B"]] = data[col].apply(parse_rgb).tolist()

# 删除原始字符串列和无用的列
data = data.drop(columns=["black_box", "red_circle", "green_triangle", "blue_pentagon", "real_rgb", "color_code", "unique_id"])

# 分割特征和目标变量
X = data.drop(columns=["real_rgb_R", "real_rgb_G", "real_rgb_B"])
y = data[["real_rgb_R", "real_rgb_G", "real_rgb_B"]]

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 训练 Random Forest 回归模型
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 预测
y_pred = rf_model.predict(X_test)

# 计算误差
mse = mean_squared_error(y_test, y_pred, multioutput='raw_values')
print(f"MSE (R, G, B): {mse}")
