In [3]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# 构建数据集
timesteps = 100
x = np.linspace(0, np.pi*10, timesteps)
y = np.sin(x)

# 划分训练集和测试集
train_size = int(len(y) * 0.8)
train_data, test_data = y[:train_size], y[train_size:]

# 数据集处理
def create_dataset(data, timesteps):
    dataX, dataY = [], []
    for i in range(len(data) - timesteps):
        dataX.append(data[i:i+timesteps])
        dataY.append(data[i+timesteps])
    return np.array(dataX), np.array(dataY)

trainX, trainY = create_dataset(train_data, timesteps)
testX, testY = create_dataset(test_data, timesteps)

# 创建LSTM模型
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out


In [5]:
input_dim = 1
hidden_dim = 50
output_dim = 1
model = LSTMModel(input_dim, hidden_dim, output_dim)

# 模型训练
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [6]:
train_loss = []
for epoch in range(100):
    inputs = torch.from_numpy(trainX).float().unsqueeze(2)
    labels = torch.from_numpy(trainY).float().unsqueeze(1)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    train_loss.append(loss.item())


IndexError: Dimension out of range (expected to be in range of [-2, 1], but got 2)

In [None]:
# 预测结果
inputs = torch.from_numpy(trainX).float().unsqueeze(2)
train_predict = model(inputs).detach().numpy()

inputs = torch.from_numpy(testX).float().unsqueeze(2)
test_predict = model(inputs).detach().numpy()

# 可视化结果
plt.plot(y, label='original')
plt.plot([None for i in range(timesteps)]+list(train_predict), label='train predict')
plt.plot([None for i in range(timesteps)]+list(test_predict), label='test predict')
plt.legend()
plt.show()

In [8]:
import numpy as np
import torch
import matplotlib.pyplot as plt

# 构建数据集
timesteps = 100
x = np.linspace(0, np.pi * 10, timesteps)
y = np.sin(x)

# 划分训练集和测试集
train_size = int(len(y) * 0.8)
train_data, test_data = y[:train_size], y[train_size:]

# 数据集处理
def create_dataset(data, timesteps):
    dataX, dataY = [], []
    for i in range(len(data) - timesteps):
        dataX.append(data[i:i + timesteps])
        dataY.append(data[i + timesteps])
    return np.array(dataX), np.array(dataY)

trainX, trainY = create_dataset(train_data, timesteps)
testX, testY = create_dataset(test_data, timesteps)

# 转换为张量
trainX = torch.from_numpy(trainX).float().unsqueeze(-1)
trainY = torch.from_numpy(trainY).float().unsqueeze(-1)
testX = torch.from_numpy(testX).float().unsqueeze(-1)

# 创建LSTM模型
model = torch.nn.Sequential(
    torch.nn.LSTM(input_size=1, hidden_size=50, batch_first=True),
    torch.nn.Linear(in_features=50, out_features=1)
)

# 定义损失函数和优化器
loss_fn = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练模型
train_loss = []
for epoch in range(100):
    optimizer.zero_grad()
    outputs, _ = model(trainX)
    loss = loss_fn(outputs[:, -1], trainY)
    loss.backward()
    optimizer.step()
    train_loss.append(loss.item())

# 预测结果
with torch.no_grad():
    train_predict, _ = model(trainX)
    test_predict, _ = model(testX)

# 可视化结果
plt.plot(y, label='original')
plt.plot([None for i in range(timesteps)]+list(train_predict[:, -1]), label='train predict')
plt.plot([None for i in range(train_size)]+[None for i in range(timesteps - 1)]+list(test_predict[:, -1]), label='test predict')
plt.legend()
plt.show()


TypeError: linear(): argument 'input' (position 1) must be Tensor, not tuple

In [24]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data

path = 'D:/学习资料/DataScience/TB2/Mini-project/model/daily_data.csv'
df = pd.read_csv(path)
df = df[['Year','Month','Day','Hourly Flow']]


In [25]:
# 假设数据集为 df
# 将年月日合并成时间序列的 index
df.index = pd.to_datetime(df[['Year', 'Month', 'Day']])

# 选择 Hourly Flow 列作为实践序列的值
df = df[['Hourly Flow']]
timeseries = df["Hourly Flow"].values.astype('float32')
timeseries

array([316.1965 , 316.1965 , 316.1965 , ..., 380.13638, 380.4245 ,
       380.4245 ], dtype=float32)

In [21]:
df

Unnamed: 0,Hourly Flow
2019-01-01,316.196500
2019-01-01,316.196500
2019-01-01,316.196500
2019-01-01,316.196500
2019-01-01,316.196500
...,...
2022-12-31,380.424492
2022-12-31,380.424492
2022-12-31,380.136398
2022-12-31,380.424492


In [27]:
# train-test split for time series
train_size = int(len(timeseries) * 0.67)
test_size = len(timeseries) - train_size
train, test = timeseries[:train_size], timeseries[train_size:]
 
def create_dataset(dataset, lookback):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """
    X, y = [], []
    for i in range(len(dataset)-lookback):
        feature = dataset[i:i+lookback]
        target = dataset[i+1:i+lookback+1]
        X.append(feature)
        y.append(target)
    return torch.tensor(X), torch.tensor(y)
 
lookback = 4
X_train, y_train = create_dataset(train, lookback=lookback)
X_test, y_test = create_dataset(test, lookback=lookback)
 
class AirModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=50, num_layers=1, batch_first=True)
        self.linear = nn.Linear(50, 1)
    def forward(self, x):
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x
 
model = AirModel()
optimizer = optim.Adam(model.parameters())
loss_fn = nn.MSELoss()
loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=8)
 


In [28]:
n_epochs = 2000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    if epoch % 100 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        y_pred = model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))
 


RuntimeError: input.size(-1) must be equal to input_size. Expected 1, got 4

In [None]:
with torch.no_grad():
    # shift train predictions for plotting
    train_plot = np.ones_like(timeseries) * np.nan
    y_pred = model(X_train)
    y_pred = y_pred[:, -1, :]
    train_plot[lookback:train_size] = model(X_train)[:, -1, :]
    # shift test predictions for plotting
    test_plot = np.ones_like(timeseries) * np.nan
    test_plot[train_size+lookback:len(timeseries)] = model(X_test)[:, -1, :]
# plot
plt.plot(timeseries)
plt.plot(train_plot, c='r')
plt.plot(test_plot, c='g')
plt.show()