In [42]:
import torch

import torch.optim as optim
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
# 读取excel文件
data = pd.read_excel('data/pre_20200901.xlsx')

#打乱数据所在的行
data = data.sample(frac=1).reset_index(drop=True)

# 查看前5行数据
print(data.head(5))

data.shape


   站名    区站号       纬度        经度    测站高度     年  月  日   时  温度/气温  露点温度  相对湿度  \
0  上杭  58918  25.0500  116.4167   198.0  2020  9  1   0   26.4  23.9    86   
1  株洲  57780  27.8689  113.1736    74.6  2020  9  1  16   28.1  20.4    63   
2  崇阳  57586  29.5400  114.0386    78.5  2020  9  1   4   32.1  20.4    50   
3  马关  56995  23.0186  104.3992  1332.9  2020  9  1  14   22.1  19.8    87   
4  普宁  59314  23.3983  116.1964    69.0  2020  9  1  13   28.3  26.4    90   

   降水量  瞬时风向  瞬时风速  地面温度  
0  0.0   312   1.6  30.9  
1  0.0   318   2.4  25.9  
2  0.0    25   5.1  54.3  
3  0.0    18   0.6  21.1  
4  0.0    24   2.1  28.1  


(18024, 16)

In [8]:
# 删除前两列无关信息
data=data.drop(data.columns[0], axis=1)
data.shape


(18024, 15)

In [9]:
print(data.head(5))

     区站号       纬度        经度    测站高度     年  月  日   时  温度/气温  露点温度  相对湿度  降水量  \
0  58918  25.0500  116.4167   198.0  2020  9  1   0   26.4  23.9    86  0.0   
1  57780  27.8689  113.1736    74.6  2020  9  1  16   28.1  20.4    63  0.0   
2  57586  29.5400  114.0386    78.5  2020  9  1   4   32.1  20.4    50  0.0   
3  56995  23.0186  104.3992  1332.9  2020  9  1  14   22.1  19.8    87  0.0   
4  59314  23.3983  116.1964    69.0  2020  9  1  13   28.3  26.4    90  0.0   

   瞬时风向  瞬时风速  地面温度  
0   312   1.6  30.9  
1   318   2.4  25.9  
2    25   5.1  54.3  
3    18   0.6  21.1  
4    24   2.1  28.1  


In [10]:
# 定义神经网络模型
class MyModel(nn.Module):
    def __init__(self):
        super(MyModel, self).__init__()
        self.fc1 = nn.Linear(14, 64)  # 输入层到隐藏层
        #self.fc2 = nn.Linear(256*2, 128*2)  # 隐藏层到隐藏层
        self.fc3 = nn.Linear(64 , 1)   # 隐藏层到输出层
        self.relu = nn.ReLU()          # 激活函数

    def forward(self, x):
        x = self.relu(self.fc1(x))
        #x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [11]:
#查看网络结构
model = MyModel()
print(model)

MyModel(
  (fc1): Linear(in_features=14, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=1, bias=True)
  (relu): ReLU()
)


In [44]:
inputs= data.drop(columns='降水量').values
outputs =   data.iloc[:,11].values

In [45]:
print(inputs.shape,outputs.shape)

(18024, 14) (18024,)


In [46]:
# 转换为PyTorch张量
inputs = torch.Tensor(inputs)
outputs = torch.Tensor(outputs)

In [47]:
# 划分训练集和测试集（示例中为90%训练集，10%测试集）
train_size = int(0.8 * len(data))
train_inputs, train_outputs = inputs[:train_size], outputs[:train_size]
test_inputs, test_outputs = inputs[train_size:], outputs[train_size:]

In [50]:
from sklearn.model_selection import KFold
k = 5
kf = KFold(n_splits=k)

for fold, (train_indices, val_indices) in enumerate(kf.split(train_inputs)):
    train_inputs_fold = train_inputs[train_indices]
    train_outputs_fold = train_outputs[train_indices]
    val_inputs_fold = train_inputs[val_indices]
    val_outputs_fold = train_outputs[val_indices]
    
    num_epochs = 500
    batch_size = 32
    model = MyModel()
    criterion = nn.L1Loss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs, eta_min=0)

    for epoch in range(num_epochs):
        permutation = torch.randperm(train_inputs_fold.size()[0])
        for i in range(0, train_inputs_fold.size()[0], batch_size):
            indices = permutation[i:i+batch_size]
            batch_inputs, batch_outputs = train_inputs_fold[indices], train_outputs_fold[indices]

            optimizer.zero_grad()
            outputs = model(batch_inputs)
            loss = criterion(outputs, batch_outputs)
            loss.backward()
            optimizer.step()

        if (epoch+1) % 100 == 0:
            print(f"Fold {fold+1}/{k}, Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")          
        scheduler.step()
    # 在验证集上评估模型
    model.eval()
    with torch.no_grad():
        val_outputs_predicted = model(val_inputs_fold)
        val_loss = criterion(val_outputs_predicted, val_outputs_fold)
        print(f"Fold {fold+1}/{k}, Validation Loss: {val_loss.item()}")

# 在测试集上评估模型
model.eval()
with torch.no_grad():
    test_outputs_predicted = model(test_inputs)
    test_loss = criterion(test_outputs_predicted, test_outputs)
    print(f"Test Loss: {test_loss.item()}")

# 保存模型
torch.save(model.state_dict(), "model1.pth")

Fold 1/5, Epoch 100/500, Loss: 27.88800811767578
Fold 1/5, Epoch 200/500, Loss: 4.784046173095703
Fold 1/5, Epoch 300/500, Loss: 3.163186550140381
Fold 1/5, Epoch 400/500, Loss: 0.03557095304131508
Fold 1/5, Epoch 500/500, Loss: 0.02216390334069729
Fold 1/5, Validation Loss: 4.762749671936035
Fold 2/5, Epoch 100/500, Loss: 20.27385139465332
Fold 2/5, Epoch 200/500, Loss: 2.080676676996518e-05
Fold 2/5, Epoch 300/500, Loss: 2.9551065381383523e-05
Fold 2/5, Epoch 400/500, Loss: 0.026668667793273926
Fold 2/5, Epoch 500/500, Loss: 2.405750487710634e-09
Fold 2/5, Validation Loss: 0.06281737238168716
Fold 3/5, Epoch 100/500, Loss: 29.618173599243164
Fold 3/5, Epoch 200/500, Loss: 11.792037963867188
Fold 3/5, Epoch 300/500, Loss: 0.12805581092834473
Fold 3/5, Epoch 400/500, Loss: 9.19033118407242e-06
Fold 3/5, Epoch 500/500, Loss: 6.308046307523796e-10
Fold 3/5, Validation Loss: 0.10906264185905457
Fold 4/5, Epoch 100/500, Loss: 1.2844860553741455
Fold 4/5, Epoch 200/500, Loss: 1.408704161643

  return F.l1_loss(input, target, reduction=self.reduction)


Fold 5/5, Epoch 100/500, Loss: 10.513443946838379
Fold 5/5, Epoch 200/500, Loss: 0.8783717155456543
Fold 5/5, Epoch 300/500, Loss: 0.8941454887390137
Fold 5/5, Epoch 400/500, Loss: 0.02343321032822132
Fold 5/5, Epoch 500/500, Loss: 0.013977526687085629
Fold 5/5, Validation Loss: 0.0921921357512474
Test Loss: 0.09238553792238235


  return F.l1_loss(input, target, reduction=self.reduction)
  return F.l1_loss(input, target, reduction=self.reduction)
