In [35]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

In [36]:

print(os.getcwd())
# 设置设备为GPU（如果可用），否则为CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'使用设备：{device}')


/Users/mikeshinoda/PycharmProjects/highSpeedEmergency/code
使用设备：cpu


In [37]:
from tools.csv import csv_files

csv_list = csv_files("./res")
all_series = ["Density", "Flow"]

In [38]:
# 准备数据集
def create_sequences(data, seq_length):
    xs = []
    ys = []
    for i in range(len(data) - seq_length):
        x = data[i:i + seq_length]
        y = data[i + seq_length]
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

In [39]:
import os
from PIL import Image
import matplotlib.pyplot as plt


def merge_images(image_paths, grid_size=(2, 2), output_path='merged_image.png'):
    images = [Image.open(img_path) for img_path in image_paths]

    # 图像的宽度和高度（假设所有图片大小相同）
    img_width, img_height = images[0].size

    # 创建画布大小，根据网格尺寸确定最终图片大小
    merged_width = grid_size[1] * img_width
    merged_height = grid_size[0] * img_height

    # 创建一个新画布
    merged_image = Image.new('RGB', (merged_width, merged_height))

    # 在画布上按网格位置粘贴图像
    for index, img in enumerate(images):
        row = index // grid_size[1]
        col = index % grid_size[1]
        merged_image.paste(img, (col * img_width, row * img_height))

    # 保存合并后的图像
    merged_image.save(output_path)
    print(f"Merged image saved as {output_path}")

    # 使用 matplotlib 显示合并后的图片
    plt.imshow(merged_image)
    plt.axis('off')  # 不显示坐标轴
    plt.show()



In [40]:
for csv_path in csv_list:
    for series_name in all_series:
        df = pd.read_csv(csv_path)
        # 设置'Frame'为索引
        df.set_index('Frame', inplace=True)
        # 选择要预测的特征
        series = df[series_name].values.astype(float)
        seq_length = 10  # 序列长度设置为10
        X, y = create_sequences(series, seq_length)
        # 检查是否有足够的数据进行训练
        if len(X) == 0:
            print("数据不足以创建指定长度的序列，请减少序列长度或增加数据量。")
        else:
            # 转换为张量并移动到指定设备
            X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1).to(device)
            y = torch.tensor(y, dtype=torch.float32).to(device)

            # 定义LSTM模型
            class LSTMModel(nn.Module):
                def __init__(self, input_size=1, hidden_size=16, output_size=1):
                    super(LSTMModel, self).__init__()
                    self.hidden_size = hidden_size
                    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
                    self.linear = nn.Linear(hidden_size, output_size)

                def forward(self, x):
                    out, _ = self.lstm(x)
                    out = self.linear(out[:, -1, :])
                    return out

            model = LSTMModel().to(device)  # 将模型移动到指定设备

            # 定义损失函数和优化器
            criterion = nn.MSELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

            # 记录损失值
            losses = []

            # 训练模型
            epochs = 10000
            for epoch in range(epochs):
                model.train()
                optimizer.zero_grad()
                output = model(X)
                loss = criterion(output.squeeze(), y)
                loss.backward()
                optimizer.step()

                # 保存损失值
                losses.append(loss.item())

                if (epoch + 1) % 20 == 0:
                    print(f'{csv_path}-{series_name}-第{epoch + 1}次迭代，损失值: {loss.item():.4f}')

            # 绘制损失分析图
            plt.figure()
            plt.plot(losses, label='Training Loss')
            plt.title('Loss Analysis')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.legend()
            img_dir = "./plots/q1_lstm/" + str('').join(csv_path.split("/")[-2])
            if not os.path.exists(img_dir):
                os.makedirs(img_dir, exist_ok=True)

            plt.savefig(f'{img_dir}/{csv_path.split("/")[-1]}_{series_name}_loss_plot.png')  # 保存损失图
            plt.close()

            # 进行预测
            model.eval()
            with torch.no_grad():
                # 使用训练集进行预测，并将结果移动到CPU
                predicted = model(X).squeeze().cpu().numpy()

                # 使用最近的seq_length个数据预测下一个数据点
                recent_sequence = torch.tensor(series[-seq_length:], dtype=torch.float32).unsqueeze(0).unsqueeze(-1).to(device)
                next_prediction = model(recent_sequence).cpu().item()
                print(f'下一个数据点的预测值：{next_prediction:.2f}')

            # 可视化结果并保存图片
            plt.figure()
            plt.plot(range(len(series)), series, label='raw data')
            plt.plot(range(seq_length, len(series)), predicted, label='The training set predicts the data')
            # 将下一个预测值添加到图中
            plt.xlabel('Timeframe')
            plt.ylabel(series_name)
            plt.legend()
            plt.savefig(f'{img_dir}/{csv_path.split("/")[-1]}_{series_name}_prediction_plot.png')  # 保存预测结果图
            plt.close()

./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第20次迭代，损失值: 57.4749
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第40次迭代，损失值: 32.6228
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第60次迭代，损失值: 18.0630
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第80次迭代，损失值: 11.2943
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第100次迭代，损失值: 8.2651
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第120次迭代，损失值: 7.3935
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第140次迭代，损失值: 7.1851
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第160次迭代，损失值: 5.9506
./res/without-speed/32.31.250.105/20240501_20240501115227_20240501130415_115227.csv-Density-第180次迭代，损失值:

KeyboardInterrupt: 

In [42]:
all_dir = []
for csv_path in csv_list:
    all_dir.append("./plots/q1_lstm/" + str("").join(csv_path.split("/")[-2]))
print(list(set(all_dir)))

for folder_path in all_dir:
    image_files = [os.path.join(folder_path, img) for img in os.listdir(folder_path) if
                   img.endswith('.png') or img.endswith('.jpg')]
    merge_images(image_files, grid_size=(2, 2), output_path=f'{folder_path}_merged_image.png')


IndexError: list index out of range

In [49]:
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import os
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


for csv_path in csv_list:
    for series_name in all_series:
        df = pd.read_csv(csv_path)
        # 设置'Frame'为索引
        df.set_index('Frame', inplace=True)
        # 选择要预测的特征
        series = df[series_name].values.astype(float)
        seq_length = 10  # 序列长度设置为10
        X, y = create_sequences(series, seq_length)
        # 检查是否有足够的数据进行训练
        if len(X) == 0:
            print("数据不足以创建指定长度的序列，请减少序列长度或增加数据量。")
        else:
            # 转换为张量并移动到指定设备
            X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1).to(device)
            y = torch.tensor(y, dtype=torch.float32).to(device)

            # 定义LSTM模型
            class LSTMModel(nn.Module):
                def __init__(self, input_size=1, hidden_size=16, output_size=1):
                    super(LSTMModel, self).__init__()
                    self.hidden_size = hidden_size
                    self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
                    self.linear = nn.Linear(hidden_size, output_size)

                def forward(self, x):
                    out, _ = self.lstm(x)
                    out = self.linear(out[:, -1, :])
                    return out

            model = LSTMModel().to(device)  # 将模型移动到指定设备

            # 定义损失函数和优化器
            criterion = nn.MSELoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

            # 记录损失值
            losses = []

            # 训练模型
            epochs = 10000
            for epoch in range(epochs):
                model.train()
                optimizer.zero_grad()
                output = model(X)
                loss = criterion(output.squeeze(), y)
                loss.backward()
                optimizer.step()

                # 保存损失值
                losses.append(loss.item())

                # if (epoch + 1) % 20 == 0:
                    # print(f'{csv_path}-{series_name}-第{epoch + 1}次迭代，损失值: {loss.item():.4f}')

            # 绘制损失分析图
            plt.figure()
            plt.plot(losses, label='Training Loss')
            plt.title('Loss Analysis')
            plt.xlabel('Epochs')
            plt.ylabel('Loss')
            plt.legend()
            img_dir = "./plots/q1_lstm/" + str('').join(csv_path.split("/")[-2])
            if not os.path.exists(img_dir):
                os.makedirs(img_dir, exist_ok=True)

            plt.savefig(f'{img_dir}/{csv_path.split("/")[-1]}_{series_name}_loss_plot.png')  # 保存损失图
            plt.close()

            # 进行预测
            model.eval()
            with torch.no_grad():
                # 使用训练集进行预测，并将结果移动到CPU
                predicted = model(X).squeeze().cpu().numpy()
                true_values = y.cpu().numpy()  # 获取真实值

                # 使用最近的seq_length个数据预测下一个数据点
                recent_sequence = torch.tensor(series[-seq_length:], dtype=torch.float32).unsqueeze(0).unsqueeze(-1).to(device)
                next_prediction = model(recent_sequence).cpu().item()
                print(f'下一个数据点的预测值：{next_prediction:.2f}')

                # 计算评估指标
                mse = mean_squared_error(true_values, predicted)
                rmse = mean_squared_error(true_values, predicted, squared=False)  # RMSE
                mae = mean_absolute_error(true_values, predicted)
                r2 = r2_score(true_values, predicted)

                # 输出评估指标
                print(f'评估指标 - {series_name}:')
                print(f'MSE: {mse:.4f}')
                print(f'RMSE: {rmse:.4f}')
                print(f'MAE: {mae:.4f}')
                print(f'R²: {r2:.4f}')

            # 可视化结果并保存图片
            plt.figure()
            plt.plot(range(len(series)), series, label='raw data')
            plt.plot(range(seq_length, len(series)), predicted, label='The training set predicts the data')
            # 将下一个预测值添加到图中
            plt.xlabel('Timeframe')
            plt.ylabel(series_name)
            plt.legend()
            plt.savefig(f'{img_dir}/{csv_path.split("/")[-1]}_{series_name}_prediction_plot.png')  # 保存预测结果图
            plt.close()

下一个数据点的预测值：4.62
评估指标 - Density:
MSE: 0.7653
RMSE: 0.8748
MAE: 0.5904
R²: 0.8968




下一个数据点的预测值：0.17
评估指标 - Flow:
MSE: 0.0136
RMSE: 0.1166
MAE: 0.0320
R²: 0.9760




下一个数据点的预测值：9.72
评估指标 - Density:
MSE: 0.5850
RMSE: 0.7648
MAE: 0.3271
R²: 0.9555




下一个数据点的预测值：0.82
评估指标 - Flow:
MSE: 0.0479
RMSE: 0.2190
MAE: 0.0834
R²: 0.9529




下一个数据点的预测值：6.09
评估指标 - Density:
MSE: 0.1159
RMSE: 0.3404
MAE: 0.1585
R²: 0.9731




下一个数据点的预测值：-0.22
评估指标 - Flow:
MSE: 0.0923
RMSE: 0.3037
MAE: 0.1849
R²: 0.4609




下一个数据点的预测值：3.74
评估指标 - Density:
MSE: 0.1246
RMSE: 0.3529
MAE: 0.2598
R²: 0.9790




下一个数据点的预测值：0.50
评估指标 - Flow:
MSE: 0.0775
RMSE: 0.2785
MAE: 0.1558
R²: 0.6195




下一个数据点的预测值：9.86
评估指标 - Density:
MSE: 2.8392
RMSE: 1.6850
MAE: 1.2555
R²: 0.6667




下一个数据点的预测值：1.86
评估指标 - Flow:
MSE: 0.0044
RMSE: 0.0662
MAE: 0.0436
R²: 0.9949




KeyboardInterrupt: 