In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from joblib import dump
from dateutil import parser
# 加载数据集
df = pd.read_csv('simulated_heater_usage_data.csv')

# 数据预处理
df['date'] = df['date'].apply(lambda x: parser.parse(x))

df['weekday'] = df['date'].dt.weekday
df['hour'] = df['date'].dt.hour  # 直接从'date'列提取小时

# 目标变量现在应该是小时数，而不是分钟数
df['heater_on_hour'] = df['hour']

# 选择特征
X = df[['weekday',  'initial_temperature', 'temperature_outside', 'humidity']]
# 选择目标
y = df['heater_on_hour']

# 数据标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 模型定义
heater_on_time_model = RandomForestRegressor(n_estimators=100, random_state=0)

# 训练模型
heater_on_time_model.fit(X_scaled, y)

# 应用交叉验证来评估模型
cross_val_score_heater_on_time = cross_val_score(heater_on_time_model, X_scaled, y, cv=5, scoring='neg_mean_squared_error')

# 计算平均均方根误差 (RMSE)
average_rmse_heater_on_time = np.sqrt(-np.mean(cross_val_score_heater_on_time))
print("Average RMSE for Heater On Hour Prediction:", average_rmse_heater_on_time)

# 保存模型
dump(heater_on_time_model, 'heater_on_time_prediction_model.joblib')

print("Heater On Time Prediction model saved successfully.")


Average RMSE for Heater On Hour Prediction: 7.408671754247792
Heater On Time Prediction model saved successfully.


In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestRegressor

# 加载数据集
df = pd.read_csv('simulated_heater_usage_data.csv')

# 数据预处理
df['date'] = pd.to_datetime(df['date']).dt.date
df['weekday'] = pd.to_datetime(df['date']).dt.weekday  # Monday=0, Sunday=6
df['hour'] = df['time'].apply(lambda x: int(x.split(':')[0]))

# 选择特征和目标
X = df[['weekday', 'hour', 'initial_temperature']]
y_temp = df['target_temperature']

# 初始化随机森林回归器
random_forest_regressor = RandomForestRegressor(n_estimators=100, random_state=0)

# 应用交叉验证来评估温度预测模型
cross_val_scores_temp = cross_val_score(random_forest_regressor, X, y_temp, cv=5, scoring='neg_mean_squared_error')

# 计算平均均方根误差 (RMSE)
average_rmse = np.sqrt(-np.mean(cross_val_scores_temp))
print("Average RMSE:", average_rmse)
random_forest_regressor.fit(X, y_temp)
dump(random_forest_regressor, 'temperature_prediction_model.joblib')

Average RMSE: 2.2384017958846765


['temperature_prediction_model.joblib']

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import pandas as pd
import numpy as np

# 加载数据集
df = pd.read_csv('simulated_heater_usage_data.csv')

# 确保数据中没有缺失值
df = df.dropna()

# 选择特征和目标变量
X = df[['initial_temperature', 'target_temperature', 'temperature_outside', 'humidity']]  # 特征
y = df['heating_time']  # 目标变量

# 数据划分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 初始化线性回归模型
model = LinearRegression()

# 训练模型
model.fit(X_train, y_train)

# 在测试集上进行预测
y_pred = model.predict(X_test)

# 计算测试集上的RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("RMSE on test set:", rmse)
model.fit(X_train, y_train)
joblib.dump(model, 'heating_time_prediction_model.joblib')

RMSE on test set: 3.4487932698996873


In [None]:
from joblib import dump

# 检查 y 的唯一值和数据类型
print("Unique values in 'heater_on':", df['heater_on'].unique())
print("Data type of 'heater_on':", df['heater_on'].dtype)

# 如果 y 的数据类型不是整数，将其转换为整数
if df['heater_on'].dtype != 'int':
    df['heater_on'] = df['heater_on'].astype(int)

# 训练预测加热器开启时间的模型
voting_classifier.fit(X_scaled, df['heater_on'])
dump(voting_classifier, 'voting_classifier.joblib')

# 训练并保存预测目标温度的模型
# 确保 X 和 y_temp 已准备好并且是正确的
random_forest_regressor.fit(X, y_temp)
dump(random_forest_regressor, 'temperature_prediction_model.joblib')

# 训练并保存预测所需加热时间的模型
# 确保 X_train, X_test, y_train, y_test 已准备好并且是正确的
model.fit(X_train, y_train)
dump(model, 'heating_time_prediction_model.joblib')

# 输出模型保存信息
print("Models saved successfully.")


Unique values in 'heater_on': [1 0]
Data type of 'heater_on': int32
Models saved successfully.
