In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from collections import Counter
from scipy.stats import spearmanr
import statsmodels.api as sm
from statsmodels.formula.api import ols

import warnings
# 忽略所有警告
warnings.filterwarnings("ignore")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.metrics import MeanSquaredError

In [3]:
filename = '../002Data/preprocessing data.csv'
data = pd.read_csv(filename, encoding='utf-8')
data['室厅数'] = data['室厅数'].str.replace('室', '')

In [4]:
# 2. 数据预处理
X = data[['对数均价/平方米每元','对数面积','室厅数','是否是车位充足','是否是绿化率高','是否是低总价','是否是低单价']].values
y = data['对数总价'].values
# 特征缩放
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# 3. 构建人工神经网络
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(1)  # 输出层
])

model.compile(optimizer='adam', loss='mse', metrics=[MeanSquaredError()])

# 4. 训练模型
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# # 5. 测试模型
# test_loss, test_mae = model.evaluate(X_test, y_test, verbose=0)
# print(f"Test Loss: {test_loss}, Test MAE: {test_mae}")

# 5. 评估模型并计算 R²
y_pred = model.predict(X_test).flatten()

# R² 计算
ss_res = np.sum((y_test - y_pred) ** 2)
ss_tot = np.sum((y_test - np.mean(y_test)) ** 2)
r2_score = 1 - (ss_res / ss_tot)

print(f"R²: {r2_score:.4f}")

# 6. 使用模型进行预测
predictions = model.predict(X_test[:9])
print(f"真实值: {y_test[:9]}")
print(f"预测值: {predictions.flatten()}")

Epoch 1/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 31.0701 - mean_squared_error: 31.0701 - val_loss: 27.5536 - val_mean_squared_error: 27.5536
Epoch 2/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 26.0306 - mean_squared_error: 26.0306 - val_loss: 23.0892 - val_mean_squared_error: 23.0892
Epoch 3/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 22.0776 - mean_squared_error: 22.0776 - val_loss: 18.3698 - val_mean_squared_error: 18.3698
Epoch 4/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 17.2830 - mean_squared_error: 17.2830 - val_loss: 13.2221 - val_mean_squared_error: 13.2221
Epoch 5/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 11.7618 - mean_squared_error: 11.7618 - val_loss: 7.9600 - val_mean_squared_error: 7.9600
Epoch 6/100
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2

In [5]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score,mean_absolute_error,accuracy_score

In [6]:
# 设定自变量与因变量，并划分训练集与测试集
# 首先分析总价
X = data[['对数均价/平方米每元','对数面积','室厅数','是否是车位充足','是否是绿化率高','是否是低总价','是否是低单价']]
Y = data['对数总价']

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)

# 训练模型
models = RandomForestRegressor(n_estimators=50,random_state=42)
models.fit(X_train,Y_train)

# 预测测试集结果
y_pred = models.predict(X_test)

# 计算准确率
mae = mean_absolute_error(Y_test,y_pred)
mse = mean_squared_error(Y_test,y_pred)
r2 = r2_score(Y_test,y_pred)

print('mae:',mae)
print('mse:',mse)
print('r2:',r2)

mae: 0.26327081036541294
mse: 0.1918052984102585
r2: 0.762807799596662


In [7]:
# 设定自变量与因变量，并划分训练集与测试集
# 分析均价
X = data[['对数总价','对数面积','室厅数','是否是品牌房企','是否是人车分流','是否是车位充足','是否是绿化率高','是否是低单价']]
y = data['对数均价/平方米每元']

X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=42)

# 训练模型
models = RandomForestRegressor(n_estimators=50,random_state=42)
models.fit(X_train,Y_train)

# 预测测试集结果
y_pred = models.predict(X_test)

# 计算准确率
mae = mean_absolute_error(Y_test,y_pred)
mse = mean_squared_error(Y_test,y_pred)
r2 = r2_score(Y_test,y_pred)

print('mae:',mae)
print('mse:',mse)
print('r2:',r2)

mae: 0.013742516063327291
mse: 0.0038240864129790546
r2: 0.9952710197354042


In [8]:
models = [model, models]

print(models)

[<Sequential name=sequential, built=True>, RandomForestRegressor(n_estimators=50, random_state=42)]


In [9]:
print(history)

<keras.src.callbacks.history.History object at 0x315d0c520>
