In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.metrics import MeanSquaredError
import numpy as np




# 加载数据
# 请将 'your_data.csv' 替换为你实际的数据文件名
data = pd.read_csv('../002Data/preprocessing data.csv')

# 找出所有的分类特征列
categorical_columns = data.select_dtypes(include=['object']).columns

# 对所有分类特征进行独热编码
for col in categorical_columns:
    encoded_col = pd.get_dummies(data[col], prefix=col)
    data = pd.concat([data.drop(col, axis=1), encoded_col], axis=1)

# 定义特征
X = data.drop(['对数总价', '对数均价/平方米每元'], axis=1).values

# 总价目标变量
y_total = data['对数总价'].values
# 均价目标变量
y_avg = data['对数均价/平方米每元'].values

# 特征缩放
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 划分训练集和测试集（总价预测）
X_train_total, X_test_total, y_train_total, y_test_total = train_test_split(
    X_scaled, y_total, test_size=0.2, random_state=42)

# 划分训练集和测试集（均价预测）
X_train_avg, X_test_avg, y_train_avg, y_test_avg = train_test_split(
    X_scaled, y_avg, test_size=0.2, random_state=42)

# 构建人工神经网络
def build_ann_model(input_shape):
    model = Sequential([
        Dense(32, activation='relu', input_shape=(input_shape,)),
        Dense(32, activation='relu'),
        Dense(1)  # 输出层
    ])
    model.compile(optimizer='adam', loss='mse', metrics=[MeanSquaredError()])
    return model

# 定义模型评估函数
def evaluate_model(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mae, mse, r2

# 人工神经网络模型（总价预测）
ann_total = build_ann_model(X_train_total.shape[1])
history_total = ann_total.fit(X_train_total, y_train_total, epochs=100, batch_size=32, validation_split=0.2, verbose=1)
y_pred_ann_total = ann_total.predict(X_test_total).flatten()
mae_ann_total = mean_absolute_error(y_test_total, y_pred_ann_total)
mse_ann_total = mean_squared_error(y_test_total, y_pred_ann_total)
r2_ann_total = r2_score(y_test_total, y_pred_ann_total)
print("人工神经网络模型（总价预测）:")
print(f"MAE: {mae_ann_total:.4f}, MSE: {mse_ann_total:.4f}, R²: {r2_ann_total:.4f}")

# 人工神经网络模型（均价预测）
ann_avg = build_ann_model(X_train_avg.shape[1])
history_avg = ann_avg.fit(X_train_avg, y_train_avg, epochs=100, batch_size=32, validation_split=0.2, verbose=1)
y_pred_ann_avg = ann_avg.predict(X_test_avg).flatten()
mae_ann_avg = mean_absolute_error(y_test_avg, y_pred_ann_avg)
mse_ann_avg = mean_squared_error(y_test_avg, y_pred_ann_avg)
r2_ann_avg = r2_score(y_test_avg, y_pred_ann_avg)
print("人工神经网络模型（均价预测）:")
print(f"MAE: {mae_ann_avg:.4f}, MSE: {mse_ann_avg:.4f}, R²: {r2_ann_avg:.4f}")

# 随机森林模型（总价预测）
rf_total = RandomForestRegressor(n_estimators=100, random_state=42)
mae_rf_total, mse_rf_total, r2_rf_total = evaluate_model(rf_total, X_train_total, y_train_total, X_test_total, y_test_total)
print("随机森林模型（总价预测）:")
print(f"MAE: {mae_rf_total:.4f}, MSE: {mse_rf_total:.4f}, R²: {r2_rf_total:.4f}")

# 随机森林模型（均价预测）
rf_avg = RandomForestRegressor(n_estimators=100, random_state=42)
mae_rf_avg, mse_rf_avg, r2_rf_avg = evaluate_model(rf_avg, X_train_avg, y_train_avg, X_test_avg, y_test_avg)
print("随机森林模型（均价预测）:")
print(f"MAE: {mae_rf_avg:.4f}, MSE: {mse_rf_avg:.4f}, R²: {r2_rf_avg:.4f}")

# 决策树模型（总价预测）
dt_total = DecisionTreeRegressor(random_state=42)
mae_dt_total, mse_dt_total, r2_dt_total = evaluate_model(dt_total, X_train_total, y_train_total, X_test_total, y_test_total)
print("决策树模型（总价预测）:")
print(f"MAE: {mae_dt_total:.4f}, MSE: {mse_dt_total:.4f}, R²: {r2_dt_total:.4f}")

# 决策树模型（均价预测）
dt_avg = DecisionTreeRegressor(random_state=42)
mae_dt_avg, mse_dt_avg, r2_dt_avg = evaluate_model(dt_avg, X_train_avg, y_train_avg, X_test_avg, y_test_avg)
print("决策树模型（均价预测）:")
print(f"MAE: {mae_dt_avg:.4f}, MSE: {mse_dt_avg:.4f}, R²: {r2_dt_avg:.4f}")


# 梯度提升树模型（总价预测）
gb_total = GradientBoostingRegressor(n_estimators=100, random_state=42)
mae_gb_total, mse_gb_total, r2_gb_total = evaluate_model(gb_total, X_train_total, y_train_total, X_test_total, y_test_total)
print("梯度提升树模型（总价预测）:")
print(f"MAE: {mae_gb_total:.4f}, MSE: {mse_gb_total:.4f}, R²: {r2_gb_total:.4f}")


# 梯度提升树模型（均价预测）
gb_avg = GradientBoostingRegressor(n_estimators=100, random_state=42)
mae_gb_avg, mse_gb_avg, r2_gb_avg = evaluate_model(gb_avg, X_train_avg, y_train_avg, X_test_avg, y_test_avg)
print("梯度提升树模型（均价预测）:")
print(f"MAE: {mae_gb_avg:.4f}, MSE: {mse_gb_avg:.4f}, R²: {r2_gb_avg:.4f}")






Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 28.3989 - mean_squared_error: 28.3989 - val_loss: 26.4216 - val_mean_squared_error: 26.4216
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 24.9623 - mean_squared_error: 24.9623 - val_loss: 20.9133 - val_mean_squared_error: 20.9133
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 18.1894 - mean_squared_error: 18.1894 - val_loss: 12.1229 - val_mean_squared_error: 12.1229
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 8.6165 - mean_squared_error: 8.6165 - val_loss: 3.1014 - val_mean_squared_error: 3.1014
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.7644 - mean_squared_error: 1.7644 - val_loss: 0.7092 - val_mean_squared_error: 0.7092
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 96.6519 - mean_squared_error: 96.6519 - val_loss: 92.1622 - val_mean_squared_error: 92.1622
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 89.7688 - mean_squared_error: 89.7688 - val_loss: 82.6981 - val_mean_squared_error: 82.6981
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 79.4635 - mean_squared_error: 79.4635 - val_loss: 66.9329 - val_mean_squared_error: 66.9329
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 60.8297 - mean_squared_error: 60.8297 - val_loss: 44.1046 - val_mean_squared_error: 44.1046
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 37.5055 - mean_squared_error: 37.5055 - val_loss: 19.0827 - val_mean_squared_error: 19.0827
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - 

In [2]:
# 定义一个函数来保存评价指标
def save_evaluation_results(model_name, mae, mse, r2, results):
    results.append({
        '模型名': model_name,
        'MAE': mae,
        'MSE': mse,
        'R²': r2
    })

# 初始化一个空列表来存储结果
evaluation_results = []
save_evaluation_results('人工神经网络模型（总价预测）', mae_ann_total, mse_ann_total, r2_ann_total, evaluation_results)
save_evaluation_results('人工神经网络模型（均价预测）', mae_ann_avg, mse_ann_avg, r2_ann_avg, evaluation_results)
save_evaluation_results('随机森林模型（总价预测）', mae_rf_total, mse_rf_total, r2_rf_total, evaluation_results)
save_evaluation_results('随机森林模型（均价预测）', mae_rf_avg, mse_rf_avg, r2_rf_avg, evaluation_results)
save_evaluation_results('决策树模型（总价预测）', mae_dt_total, mse_dt_total, r2_dt_total, evaluation_results)
save_evaluation_results('决策树模型（均价预测）', mae_dt_avg, mse_dt_avg, r2_dt_avg, evaluation_results)
save_evaluation_results('梯度提升树模型（总价预测）', mae_gb_total, mse_gb_total, r2_gb_total, evaluation_results)
save_evaluation_results('梯度提升树模型（均价预测）', mae_gb_avg, mse_gb_avg, r2_gb_avg, evaluation_results)




# 将结果保存到 CSV 文件中
results_df = pd.DataFrame(evaluation_results)
results_df.to_csv('../002Data/EstimatingIndex.csv', index=False, encoding='utf-8-sig')

print("评价指标已保存到 ../002Data/EstimatingIndex.csv 文件中。")

评价指标已保存到 ../002Data/EstimatingIndex.csv 文件中。


In [3]:
data = pd.read_csv('../002Data/EstimatingIndex.csv')
print(data)

              模型名       MAE       MSE        R²
0  人工神经网络模型（总价预测）  0.501535  0.458524  0.443657
1  人工神经网络模型（均价预测）  0.592525  0.694979 -0.583466
2    随机森林模型（总价预测）  0.013862  0.002406  0.997081
3    随机森林模型（均价预测）  0.004463  0.000373  0.999150
4     决策树模型（总价预测）  0.028120  0.016908  0.979485
5     决策树模型（均价预测）  0.003802  0.000307  0.999300
6   梯度提升树模型（总价预测）  0.012259  0.001202  0.998541
7   梯度提升树模型（均价预测）  0.002800  0.000228  0.999479


In [4]:
# 定义一个函数来保存预测结果
def save_predictions(model_name, true_values, predicted_values, results):
    for true, pred in zip(true_values, predicted_values):
        results.append({
            '模型名称': model_name,
            '真实值': true,
            '预测值': pred
        })

# 初始化一个空列表来存储结果
prediction_results = []




ann_total_predictions = ann_total.predict(X_test_total[:2])
print("人工神经网络模型（总价预测）:")
print(f"真实值: {np.exp(y_test_total[:2])}")
print(f"预测值: {np.exp(ann_total_predictions.flatten())}")
save_predictions('人工神经网络模型（总价预测）', np.exp(y_test_total[:2]), np.exp(ann_total_predictions), prediction_results)
ann_avg_predictions = ann_avg.predict(X_test_avg[:2])
print("人工神经网络模型（均价预测）:")
print(f"真实值: {np.exp(y_test_avg[:2])}")
print(f"预测值: {np.exp(ann_avg_predictions.flatten())}")
save_predictions('人工神经网络模型（均价预测）', np.exp(y_test_avg[:2]), np.exp(ann_avg_predictions), prediction_results)


rf_total_predictions = rf_total.predict(X_test_total[:2])
print("随机森林模型（总价预测）:")
print(f"真实值: {np.exp(y_test_total[:2])}")
print(f"预测值: {np.exp(rf_total_predictions)}")
save_predictions('随机森林模型（总价预测）', np.exp(y_test_total[:2]), np.exp(rf_total_predictions), prediction_results)
rf_avg_predictions = rf_avg.predict(X_test_avg[:2]) 
print("随机森林模型（均价预测）:")
print(f"真实值: {np.exp(y_test_avg[:2])}")
print(f"预测值: {np.exp(rf_avg_predictions)}")
save_predictions('随机森林模型（均价预测）', np.exp(y_test_avg[:2]), np.exp(rf_avg_predictions), prediction_results)


dt_total_predictions = dt_total.predict(X_test_total[:2])
print("决策树模型（总价预测）:")
print(f"真实值: {np.exp(y_test_total[:2])}")
print(f"预测值: {np.exp(dt_total_predictions)}")
save_predictions('决策树模型（总价预测）', np.exp(y_test_total[:2]), np.exp(dt_total_predictions), prediction_results)
dt_avg_predictions = dt_avg.predict(X_test_avg[:2]) 
print("决策树模型（均价预测）:")
print(f"真实值: {np.exp(y_test_avg[:2])}")
print(f"预测值: {np.exp(dt_avg_predictions)}")
save_predictions('决策树模型（均价预测）', np.exp(y_test_avg[:2]), np.exp(dt_avg_predictions), prediction_results)

gb_total_predictions = gb_total.predict(X_test_total[:2])
print("梯度提升树模型（总价预测）:")
print(f"真实值: {np.exp(y_test_total[:2])}")
print(f"预测值: {np.exp(gb_total_predictions)}")
save_predictions('梯度提升树模型（总价预测）', np.exp(y_test_total[:2]), np.exp(gb_total_predictions), prediction_results)
gb_avg_predictions = gb_avg.predict(X_test_avg[:2]) 
print("梯度提升树模型（均价预测）:")
print(f"真实值: {np.exp(y_test_avg[:2])}")
print(f"预测值: {np.exp(gb_avg_predictions)}")
save_predictions('梯度提升树模型（均价预测）', np.exp(y_test_avg[:2]), np.exp(gb_avg_predictions), prediction_results)



# 将结果保存到 CSV 文件中
predictions_df = pd.DataFrame(prediction_results)
predictions_df.to_csv('../002Data/EstimatingPrice.csv', index=False, encoding='utf-8-sig')

print("预测结果已保存到 ../002Data/EstimatingPrice.csv 文件中。")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step
人工神经网络模型（总价预测）:
真实值: [1721.  238.]
预测值: [2507.5183   161.73134]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
人工神经网络模型（均价预测）:
真实值: [55000. 23800.]
预测值: [68774.67  10383.897]
随机森林模型（总价预测）:
真实值: [1721.  238.]
预测值: [1645.66662034  239.71912837]
随机森林模型（均价预测）:
真实值: [55000. 23800.]
预测值: [55400.5949499 24000.       ]
决策树模型（总价预测）:
真实值: [1721.  238.]
预测值: [1535.   238.5]
决策树模型（均价预测）:
真实值: [55000. 23800.]
预测值: [55000. 24000.]
梯度提升树模型（总价预测）:
真实值: [1721.  238.]
预测值: [1678.08584194  239.6204153 ]
梯度提升树模型（均价预测）:
真实值: [55000. 23800.]
预测值: [55007.54347336 23998.22633106]
预测结果已保存到 ../002Data/EstimatingPrice.csv 文件中。
