In [4]:
import pandas as pd
import warnings
warnings.filterwarnings(action="ignore")

import tensorflow_decision_forests as tfdf
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# ----------------------------
# 1. 加载数据
# ----------------------------

df=pd.read_csv("train.csv",index_col="Id")

# ----------------------------
# 2. 数据预处理
# ----------------------------
# 目标变量
target = "SalePrice"

# 删除明显无关列（可选）
drop_cols = ["Order", "PID"] if "PID" in df.columns else []
df.drop(columns=drop_cols, errors='ignore', inplace=True)

# 确保目标列存在
assert target in df.columns, f"{target} not found in data"

# 去除缺失值（TF-DF 能处理缺失，但训练更稳定）
df = df.dropna(subset=[target])  # 可选：也可以用 df.dropna() 去掉全部 NA

# ----------------------------
# 3. 划分训练集 & 测试集
# ----------------------------
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

# ----------------------------
# 4. 构建 & 训练模型
# ----------------------------
model = tfdf.keras.RandomForestModel(
    task=tfdf.keras.Task.REGRESSION,
    hyperparameter_template="benchmark_rank1", 
)

# 自动从 DataFrame 中识别特征与目标列
model.fit(train_df, label=target)

# ----------------------------
# 5. 模型评估
# ----------------------------
model.compile(metrics=["mae", "rmse", "mse"])
evaluation = model.evaluate(test_df, return_dict=True)

print("\n🔍 模型评估结果：")
for metric, value in evaluation.items():
    print(f"{metric}: {value:.2f}")

# ----------------------------
# 6. 特征重要性分析
# ----------------------------
inspector = model.make_inspector()
important_features = inspector.variable_importances()["NUMERICAL_SCORE"]

print("\n📊 Top 10 特征（按重要性）:")
for feature in important_features[:10]:
    print(f"{feature.feature}: {feature.importance:.2f}")

# 可视化
model.make_inspector().plot_feature_importances()
plt.title("Top Feature Importances")
plt.show()

# ----------------------------
# 7. 模型保存 & 加载
# ----------------------------
model.save("saved_model/ames_tfdf")

# 预测示例
predictions = model.predict(test_df.drop(columns=[target]))
print("\n前5条预测结果（单位：房价）：")
print(predictions[:5])


ModuleNotFoundError: No module named 'tensorflow_decision_forests'