In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [None]:
# 讀取數據
file_path = './Sort/Building_energy_consumption/filtered_merged_data_Floor1.csv'
data = pd.read_csv(file_path)

# 將日期列轉換為日期時間格式並排序
data['Date'] = pd.to_datetime(data['Date'])
data.sort_values('Date', inplace=True)

# 填補其他欄位的缺失值（先用前向填補作為初步處理）
data.fillna(method='ffill', inplace=True)

# 選擇數值列進行標準化
numerical_columns = data.select_dtypes(include=['float64']).columns
scaler = MinMaxScaler()
scaled_data = data.copy()
scaled_data[numerical_columns] = scaler.fit_transform(data[numerical_columns])

# 將數據轉換為適合LSTM輸入的格式
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

time_step = 10
data = scaled_data[numerical_columns].values
trainX, trainY = create_dataset(data, time_step)

# 重塑數據以符合LSTM輸入要求 [samples, time steps, features]
trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm

In [None]:
# 將數據轉換為 PyTorch 張量
trainX_tensor = torch.Tensor(trainX)
trainY_tensor = torch.Tensor(trainY)

# 創建數據加載器
train_dataset = TensorDataset(trainX_tensor, trainY_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

# 定義 LSTM 模型
class LSTMModel(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=50, output_size=1):
        super(LSTMModel, self).__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1, 1, self.hidden_layer_size),
                            torch.zeros(1, 1, self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq, self.hidden_cell)
        predictions = self.linear(lstm_out[:, -1])
        return predictions

# 初始化模型、損失函數和優化器
model = LSTMModel()
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 訓練模型
epochs = 10
for i in range(epochs):
    model.train()
    total_loss = 0
    with tqdm(train_loader, unit="batch") as tepoch:
        for seq, labels in tepoch:
            tepoch.set_description(f"Epoch {i+1}")
            optimizer.zero_grad()
            model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))

            y_pred = model(seq)

            single_loss = loss_function(y_pred, labels)
            single_loss.backward()
            optimizer.step()

            total_loss += single_loss.item()
            tepoch.set_postfix(loss=total_loss/len(train_loader))

    print(f'Epoch {i+1} loss: {total_loss/len(train_loader)}')

In [None]:
# 使用訓練好的模型進行預測
model.eval()
predictions = []
for seq in trainX_tensor:
    with torch.no_grad():
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                             torch.zeros(1, 1, model.hidden_layer_size))
        predictions.append(model(seq.unsqueeze(0)).item())

# 反標準化預測結果
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# 將預測結果填補回原始數據中
filled_data = scaled_data.copy()
filled_data.loc[filled_data[numerical_columns].isnull().any(axis=1), numerical_columns] = predictions
# 保存填補後的數據
filled_data.to_csv('./Sort/Building_energy_consumption/filled_filtered_merged_data_Floor1.csv', index=False)