In [11]:
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.metrics import mean_squared_error

In [12]:
# 1. 读取数据
data = pd.read_csv('data/customer_power_data.csv')

In [13]:
# 2. 时间戳特征处理
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['hour'] = data['timestamp'].dt.hour
data['day_of_week'] = data['timestamp'].dt.dayofweek
data['month'] = data['timestamp'].dt.month

In [14]:
# 3. 准备特征和目标值
X = data[['hour', 'day_of_week', 'month']]  # 输入特征为时间特征
y = data[['load_mg1_1', 'load_mg1_2', 'load_mg1_3', 'load_mg1_4', 'load_mg1_5',
          'load_mg1_6', 'load_mg1_7', 'load_mg1_8', 'load_mg1_9', 'load_mg1_10',
          'load_mg1_11', 'load_mg1_12', 'load_mg1_13', 'load_mg1_14', 'load_mg1_15',
          'load_mg1_16', 'load_mg1_17', 'load_mg1_18', 'load_mg1_19', 'load_mg1_20',
          'load_mg1_21', 'load_mg1_22', 'load_mg1_23', 'load_mg1_24', 'load_mg1_25',
          'load_mg1_26', 'load_mg1_27', 'load_mg1_28', 'load_mg1_29', 'load_mg1_30',
          'load_mg1_31']]  # 输出为每位用户的用电量

In [15]:
# 4. 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
# 5. 训练LightGBM多输出回归模型
model = MultiOutputRegressor(lgb.LGBMRegressor())
model.fit(X_train, y_train)

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000172 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 44
[LightGBM] [Info] Number of data points in the train set: 13977, number of used features: 3
[LightGBM] [Info] Start training from score 0.625265
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000189 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 44
[LightGBM] [Info] Number of data points in the train set: 13977, number of used features: 3
[LightGBM] [Info] Start training from score 0.282908
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000197 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you

In [17]:
# 6. 预测用户用电量
y_pred = model.predict(X_test)

In [18]:
# 7. 计算预测值的总和（SUM）
y_pred_sum = y_pred.sum(axis=1)

In [19]:
# 8. 评估模型表现
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error (Users): {mse}')

Mean Squared Error (Users): 0.14638368336308763


In [20]:
# 9. 如果需要对SUM进行评估
actual_sum = y_test.sum(axis=1)
mse_sum = mean_squared_error(actual_sum, y_pred_sum)
print(f'Mean Squared Error (SUM): {mse_sum}')

Mean Squared Error (SUM): 11.41259601064296


In [21]:
# 10. 输出每位用户的预测值和总用电量
prediction_results = pd.DataFrame(y_pred, columns=y.columns)
prediction_results['Predicted_SUM'] = y_pred_sum
print(prediction_results.head())

   load_mg1_1  load_mg1_2  load_mg1_3  load_mg1_4  load_mg1_5  load_mg1_6  \
0    0.429703    0.220759    0.110334    0.435672    0.582843    0.230303   
1    0.380077    0.121004    0.111434    0.226308    0.236535    0.175256   
2    0.327673    0.291786    0.111834    0.332381    0.366020    0.259893   
3    0.455586    0.160683    0.100575    0.537404    0.306045    0.303376   
4    1.021142    0.566491    0.324921    0.215111    0.481798    0.412862   

   load_mg1_7  load_mg1_8  load_mg1_9  load_mg1_10  ...  load_mg1_23  \
0    1.224542    0.194913    0.391565     0.405276  ...     0.259587   
1    0.447927    0.415580    0.338973     0.395446  ...     0.151058   
2    0.429693    0.569835    0.536339     0.879503  ...     0.184424   
3    0.685344    0.218341    0.449694     0.796630  ...     0.128705   
4    0.934533    0.698736    0.549491     0.982551  ...     0.361945   

   load_mg1_24  load_mg1_25  load_mg1_26  load_mg1_27  load_mg1_28  \
0     0.401517     0.972866     0.

In [23]:
import tkinter as tk
from tkinter import ttk

# 0. 处理时间数据
time_data = data['timestamp'].tolist()  # 将时间转换为列表

# 1. 获取用户列名和总用电量列
user_columns = [col for col in data.columns if col.startswith('load_mg1_')]
sum_column = 'SUM'

# 2. 创建Tkinter窗口
class PowerUsageApp:
    def __init__(self, root):
        self.root = root
        self.root.title("用户用电量展示")
        self.root.geometry("600x400")

        # 时间选择滑动条
        self.time_label = tk.Label(root, text="选择时间点：")
        self.time_label.pack()

        # 创建一个显示滑动条的LabelFrame
        self.slider_frame = tk.LabelFrame(root, text="时间选择")
        self.slider_frame.pack(pady=10)

        # Slider: 拖动条显示时间点
        self.time_slider = tk.Scale(self.slider_frame, from_=0, to=len(time_data) - 1, orient=tk.HORIZONTAL, length=500, command=self.update_usage)
        self.time_slider.pack()

        # 用电量展示框
        self.output_frame = tk.LabelFrame(root, text="用电量信息")
        self.output_frame.pack(pady=20)

        self.usage_text = tk.Text(self.output_frame, height=10, width=70)
        self.usage_text.pack()

        # 初始化展示第一个时间点的数据
        self.update_usage(0)

    def update_usage(self, value):
        # 根据选择的时间点获取数据
        time_index = int(value)
        selected_time = time_data[time_index]

        # 获取对应时间的用户用电量和总用电量
        selected_row = data.iloc[time_index]

        # 清空显示区域
        self.usage_text.delete(1.0, tk.END)

        # 显示选择的时间
        self.usage_text.insert(tk.END, f"时间: {selected_time}\n\n")

        # 显示每个用户的用电量
        for col in user_columns:
            self.usage_text.insert(tk.END, f"{col}: {selected_row[col]:.3f} kWh\n")

        # 显示总用电量
        self.usage_text.insert(tk.END, f"\n总用电量(SUM): {selected_row[sum_column]:.3f} kWh")

# 3. 运行Tkinter主窗口
if __name__ == "__main__":
    root = tk.Tk()
    app = PowerUsageApp(root)
    root.mainloop()