In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import pandas as pd

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import json

In [4]:
# Read the dataset
file_path = "./input_for_gpt.json"
dataframe = pd.read_json(file_path)

type(dataframe.loc[0,'runner_matrix'][0][0][0]), type(dataframe.loc[0,'month_7']), type(dataframe.loc[0,'date'])

# # Step 1: Data Reading
# file_path = "./input_for_gpt.json"
# with open(file_path, "r") as f:
#     data = json.load(f)

(float, numpy.bool_, pandas._libs.tslibs.timestamps.Timestamp)

In [5]:

# Step 2: Feature Engineering
# Convert dates from milliseconds to a standard format if necessary
dates = dataframe["date"]


In [6]:
# Extract weather features and runner_matrix
weather_features = dataframe[["Wind Speed_int","Temperature_int","Humidity_int"]].to_numpy() # .transpose(1, 0)  # Transpose to have features in the correct shape

In [7]:
# runner_matrix for each day
runner_matrix = np.array(dataframe["runner_matrix"])

In [8]:
lst = []
for i in dataframe.index:
    lst.append(np.array(dataframe["runner_matrix"].loc[i]))
runner_matrix_all = np.array(lst)
runner_matrix_all.shape

(59, 31, 33, 1)

In [9]:
runner_matrix_all = runner_matrix_all.transpose(0, 3, 1, 2)  # 转置以得到 (59, 1, 31, 33)

In [10]:

# 输入数据是前七日的光栅矩阵和天气特征，目标是第八日的 runner_matrix
input_data = []
target_data = []

# 循环创建输入和目标
for i in range(len(runner_matrix_all) - 7):
    # 输入为前七日的光栅矩阵和天气特征
    past_runner_matrix = runner_matrix_all[i:i+7]  # (7, 31, 33, 1)
    past_weather_features = weather_features[i:i+7]  # (7, 3)

    # 目标是第八日的 runner_matrix
    target_runner_matrix = runner_matrix_all[i + 7]  # (31, 33, 1)

    input_data.append((past_runner_matrix, past_weather_features))
    target_data.append(target_runner_matrix)


In [11]:

# 转换为张量
input_runner_tensor = torch.tensor([x[0] for x in input_data], dtype=torch.float32)  # (批次, 7, 31, 33, 1)
input_weather_tensor = torch.tensor([x[1] for x in input_data], dtype=torch.float32)  # (批次, 7, 3)
target_runner_tensor = torch.tensor(target_data, dtype=torch.float32)  # (批次, 31, 33, 1)


  input_runner_tensor = torch.tensor([x[0] for x in input_data], dtype=torch.float32)  # (批次, 7, 31, 33, 1)


In [12]:
input_runner_tensor.shape, input_weather_tensor.shape, target_runner_tensor.shape

(torch.Size([52, 7, 1, 31, 33]),
 torch.Size([52, 7, 3]),
 torch.Size([52, 1, 31, 33]))

In [31]:
# 修复模型，确保每层的输入和输出正确
class RunnerWeatherModel(nn.Module):
    def __init__(self, cnn_out_channels=32, lstm_hidden_size=64, dense_hidden_size=32):
        super(RunnerWeatherModel, self).__init__()

        # CNN 部分
        self.cnn = nn.Sequential(
            nn.Conv2d(1, cnn_out_channels, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(cnn_out_channels, cnn_out_channels * 2, kernel_size=(3, 3), padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )

        # Dense 层处理天气特征
        self.weather_dense = nn.Sequential(
            nn.Linear(3 * 7, dense_hidden_size),
            nn.ReLU()
        )

        # LSTM 部分
        lstm_input_size = cnn_out_channels * 2 * 7 * 8 + dense_hidden_size  # 根据 CNN 输出计算
        self.lstm = nn.LSTM(lstm_input_size, lstm_hidden_size, batch_first=True)

        # 最终 Dense 层
        self.final_dense = nn.Sequential(
            nn.Linear(lstm_hidden_size, 31 * 33),  # 确保最终输出的大小
            nn.ReLU()
        )

    def forward(self, runner_input, weather_input):
        batch_size = runner_input.size(0)

        # CNN 部分
        cnn_outputs = []
        for i in range(7):
            cnn_output = self.cnn(runner_input[:, i, :, :, :])  # 应该返回 (batch_size, channels, height, width)
            cnn_output = cnn_output.view(batch_size, -1)  # 展平
            cnn_outputs.append(cnn_output)

        # Dense 处理天气特征
        weather_input_flat = weather_input.view(batch_size, -1)  # 展平天气数据
        weather_output = self.weather_dense(weather_input_flat)

        # 合并 CNN 和天气输出
        lstm_input = torch.cat(cnn_outputs, dim=1)  # 合并所有 CNN 输出
        lstm_input = torch.cat([lstm_input, weather_output], dim=1)  # 加入天气数据
        
        lstm_input = lstm_input.unsqueeze(1)  # 确保有时间维度
        
        # LSTM 部分
        lstm_output, _ = self.lstm(lstm_input)  # 确保 LSTM 输入正确
        lstm_output = lstm_output[:, -1, :]  # 获取 LSTM 的最后一个输出

        # 最终 Dense 层
        output = self.final_dense(lstm_output)

        return output.view(batch_size, 31, 33, 1)  # 确保与目标匹配

In [32]:
print(model)


RunnerWeatherModel(
  (cnn): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (weather_dense): Sequential(
    (0): Linear(in_features=21, out_features=32, bias=True)
    (1): ReLU()
  )
  (lstm): LSTM(3616, 64, batch_first=True)
  (final_dense): Sequential(
    (0): Linear(in_features=64, out_features=1023, bias=True)
    (1): ReLU()
  )
)


In [26]:
print(model)


RunnerWeatherModel(
  (cnn): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (weather_dense): Sequential(
    (0): Linear(in_features=21, out_features=32, bias=True)
    (1): ReLU()
  )
  (lstm): LSTM(3616, 64, batch_first=True)
  (final_dense): Sequential(
    (0): Linear(in_features=64, out_features=1023, bias=True)
    (1): ReLU()
  )
)


In [27]:
from torchsummary import summary

summary(model, input_size=[(7, 1, 31, 33), (7, 3)])  # 前七日的光栅矩阵和天气特征


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x25120 and 3616x256)

In [16]:
# Step 2: 构建数据集和数据加载器
dataset = TensorDataset(input_runner_tensor, input_weather_tensor, target_runner_tensor)
data_loader = DataLoader(dataset, batch_size=8, shuffle=True)

# Step 4: 训练循环
model = RunnerWeatherModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
loss_function = nn.MSELoss()

num_epochs = 10
for epoch in range(num_epochs):
    for runner_input, weather_input, target in data_loader:
        # 重置梯度
        optimizer.zero_grad()

        # 前向传播
        outputs = model(runner_input, weather_input)
        loss = loss_function(outputs, target)  # 确保输出与目标匹配

        # 反向传播和优化
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")

print("Training complete")

RuntimeError: mat1 and mat2 shapes cannot be multiplied (8x25120 and 3616x256)

In [None]:

# Convert to tensors
input_data = torch.tensor(input_data, dtype=torch.float32)
target_data = torch.tensor(target_data, dtype=torch.float32)

# Step 3: Model Building
class WeatherRunnerPredictor(nn.Module):
    def __init__(self, input_channels=2, hidden_channels=64, lstm_hidden=128, output_size=1):
        super(WeatherRunnerPredictor, self).__init__()
        
        # CNN layers
        self.conv1 = nn.Conv2d(input_channels, hidden_channels, kernel_size=(3, 3), padding=1)
        self.conv2 = nn.Conv2d(hidden_channels, hidden_channels, kernel_size=(3, 3), padding=1)
        self.pool = nn.MaxPool2d((2, 2))
        
        # LSTM layers
        self.lstm = nn.LSTM(hidden_channels * 7 * 7, lstm_hidden, batch_first=True)
        
        # Dense output layer
        self.dense = nn.Linear(lstm_hidden, output_size)

    def forward(self, x):
        # CNN layers
        x = torch.relu(self.conv1(x))
        x = self.pool(x)
        x = torch.relu(self.conv2(x))
        x = self.pool(x)

        # Flatten for LSTM input
        batch_size, channels, height, width = x.size()
        x = x.view(batch_size, -1, channels * height * width)  # Reshape for LSTM

        # LSTM layers
        x, _ = self.lstm(x)
        
        # Use the last LSTM output for the dense layer
        x = self.dense(x[:, -1, :])

        return x

# Step 4: Training Loop
# Define the model, loss function, optimizer, and data loader
model = WeatherRunnerPredictor(input_channels=2, hidden_channels=64, lstm_hidden=128, output_size=1)
loss_function = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
data_loader = DataLoader(TensorDataset(input_data, target_data), batch_size=16, shuffle=True)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    for batch in data_loader:
        inputs, targets = batch

        # Reset gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = loss_function(outputs, targets)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {loss.item()}")

print("Training complete")
