In [1]:
import random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from data import MyData, collate_fn
from model import BaseLSTM
from torch.utils.data import DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
def setup_seed(seed):
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     np.random.seed(seed)
     random.seed(seed)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
N = 50000
batch_size = 1
len_int = [1,6,11,16,21,100]

for begin_i in range(len(len_int)):
    for end_i in range(begin_i+1, len(len_int)):
        setup_seed(10086)
        traindata = MyData(data_path='./data/TrainData.json',min_l=len_int[begin_i],max_l=len_int[end_i],frac=1,N=N)
        valdata = MyData(data_path='./data/TestData.json',min_l=len_int[begin_i],max_l=len_int[end_i])

        train_dataloader = DataLoader(traindata, batch_size=batch_size, shuffle=True)
        val_dataloader = DataLoader(valdata, batch_size=batch_size, shuffle=False)

        model = BaseLSTM(name=f'{len_int[begin_i]}-{len_int[end_i]}').to(device)
        criterion = nn.MSELoss().to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

        # 进行模型训练
        epochs = 100
        best_rmse = 1
        for epoch in range(epochs):
            model.train()
            print(f'==========Epoch {epoch}==========')
            for idx, batch in enumerate(train_dataloader):
                X, y, last, _ = batch
                X = X.to(device)
                y = y.to(device)

                # 前向传播和计算损失
                optimizer.zero_grad()
                outputs = model(X)
                loss = criterion(outputs, y)
                # print(f'{idx}, Loss: {loss.item()}')

                # 反向传播和参数更新
                loss.backward()
                optimizer.step()

            for idx, batch in enumerate(val_dataloader):
                X, y, last, _ = batch
                X = X.to(device)
                y = y.to(device)
                outputs = model(X)
                loss = criterion(outputs, y)
                RMSE = torch.sqrt(torch.mean(torch.sum(torch.square(outputs[:,:2]-y[:,:2]),dim=1)))

                if best_rmse >  RMSE:
                    best_rmse = RMSE
                    print('Test Best RMSE',RMSE.item())
                    model.save(f'best.pth')

加载数据，总共有 436 个台风， 12839 条台风数据
总共生成 50000 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 2041 个序列


KeyboardInterrupt: 

In [49]:
testdata = MyData(data_path='./data/TestData.json',l=5,frac=1)
batch_size = 1
len_int = [1,6,11,16,21,100]
RMSE = {}
for begin_i in range(len(len_int)):
    for end_i in range(begin_i+1, len(len_int)):
        GT_Predict = pd.DataFrame(np.full([len(testdata), 5],np.nan),columns=['True_lat','True_lon','Predict_lat','Predict_lon','SE'])

        model = MyLSTM().to(device)
        model.load_state_dict(torch.load(f'checkpoints/{len_int[begin_i]}-{len_int[end_i]}/best.pth'))
        test_dataloader = DataLoader(testdata, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

        for i ,batch in enumerate(test_dataloader):
            X, y, last, _ = batch

            predict_y = model(X.to(device), last).cpu().detach().numpy()
            true_y = y.numpy()
        GT_Predict.iloc[:, :2] = true_y[:,:2]

        GT_Predict.iloc[:, 2:4] = predict_y[:,:2]
        GT_Predict.iloc[:, 4] = np.sum(np.square(GT_Predict.iloc[:, :2].values - GT_Predict.iloc[:, 2:4].values), axis=1)

        GT_Predict.to_csv(f'checkpoints/{len_int[begin_i]}-{len_int[end_i]}/TestPredict.csv')
        RMSE[f'{len_int[begin_i]}-{len_int[end_i]}'] = np.sqrt(np.mean(GT_Predict['SE'].values))
RMSE

加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3779 个序列


{'1-6': 0.20817875589626733,
 '1-11': 0.21141195687010442,
 '1-16': 0.21975755760393634,
 '1-21': 0.22138154272733093,
 '1-100': 0.21078112926780895,
 '6-11': 0.21067379858850538,
 '6-16': 0.195371927443585,
 '6-21': 0.20073864451274281,
 '6-100': 0.20793596168469075,
 '11-16': 0.20418009200643986,
 '11-21': 0.22919615432858367,
 '11-100': 0.20012317544736885,
 '16-21': 0.20075688699918615,
 '16-100': 0.20183737381000857,
 '21-100': 0.21374130207922973}

In [51]:
N = 5e4
batch_size = 4096
len_int = [1,2,3,4,5,6,7,8,9,10]

for begin_i in range(len(len_int)-1):
    setup_seed(10086)
    end_i = begin_i+1
    traindata = MyData(data_path='./data/TrainData.json',min_l=len_int[begin_i],max_l=len_int[end_i],frac=1,N=N)
    valdata = MyData(data_path='./data/TestData.json',min_l=len_int[begin_i],max_l=len_int[end_i])

    train_dataloader = DataLoader(traindata, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
    val_dataloader = DataLoader(valdata, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    model = MyLSTM(name=f'{len_int[begin_i]}-{len_int[end_i]}').to(device)
    criterion = nn.MSELoss().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # 进行模型训练
    epochs = 500
    best_rmse = 1
    for epoch in range(epochs):
        model.train()
        print(f'==========Epoch {epoch}==========')
        for idx, batch in enumerate(train_dataloader):
            X, y, last, _ = batch
            X = X.to(device)
            y = y.to(device)

            # 前向传播和计算损失
            optimizer.zero_grad()
            outputs = model(X, last)
            loss = criterion(outputs, y)
            # print(f'{idx}, Loss: {loss.item()}')

            # 反向传播和参数更新
            loss.backward()
            optimizer.step()

        for idx, batch in enumerate(val_dataloader):
            X, y, last, _ = batch
            X = X.to(device)
            y = y.to(device)
            outputs = model(X, last)
            loss = criterion(outputs, y)
            RMSE = torch.sqrt(torch.mean(torch.sum(torch.square(outputs[:,:2]-y[:,:2]),dim=1)))

            if best_rmse >  RMSE:
                best_rmse = RMSE
                print('Test Best RMSE',RMSE.item())
                model.save(f'best.pth')

加载数据，总共有 436 个台风， 12839 条台风数据
总共生成 12403 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 438 个序列
Test Best RMSE 0.43673866987228394
Test Best RMSE 0.41207289695739746
Test Best RMSE 0.3854142427444458
Test Best RMSE 0.35621944069862366
Test Best RMSE 0.3245150148868561
Test Best RMSE 0.2913462519645691
Test Best RMSE 0.2593698799610138
Test Best RMSE 0.23271732032299042
Test Best RMSE 0.21651095151901245
Test Best RMSE 0.21365898847579956
Test Best RMSE 0.2123001515865326
Test Best RMSE 0.20956473052501678
Test Best RMSE 0.20795853435993195
Test Best RMSE 0.20695321261882782
Test Best RMSE 0.20631666481494904
Test Best RMSE 0.206010103225708
Test Best RMSE 0.2059261053800583
Test Best RMSE 0.20567935705184937
Test Best RMSE 0.20511479675769806
Test Best RMSE 0.20439943671226501
Test Best RMSE 0.20381560921669006
Test Best RMSE 0.20302359759807587
Test Best RMSE 0.20200082659721375
Test Best RMSE 0.2008131444454193
Test Best RMSE 0.19946087896823883
Test Best RMSE 0.1980973333120346
Test Best RMS

In [54]:
# testdata = MyData(data_path='./data/TestData.json',l=5,frac=1)
batch_size = 10000
len_int = [1,2,3,4,5,6,7,8,9,10]
singleRMSE = {}
for begin_i in range(len(len_int)-1):
    end_i = begin_i+1
    testdata = MyData(data_path='./data/TestData.json',min_l=len_int[begin_i],max_l=len_int[end_i],frac=1)
    GT_Predict = pd.DataFrame(np.full([len(testdata), 5],np.nan),columns=['True_lat','True_lon','Predict_lat','Predict_lon','SE'])

    model = MyLSTM().to(device)
    model.load_state_dict(torch.load(f'checkpoints/{len_int[begin_i]}-{len_int[end_i]}/best.pth'))
    test_dataloader = DataLoader(testdata, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

    for i ,batch in enumerate(test_dataloader):
        X, y, last, _ = batch

        predict_y = model(X.to(device), last).cpu().detach().numpy()
        true_y = y.numpy()
    GT_Predict.iloc[:, :2] = true_y[:,:2]

    GT_Predict.iloc[:, 2:4] = predict_y[:,:2]
    GT_Predict.iloc[:, 4] = np.sum(np.square(GT_Predict.iloc[:, :2].values - GT_Predict.iloc[:, 2:4].values), axis=1)

    GT_Predict.to_csv(f'checkpoints/{len_int[begin_i]}-{len_int[end_i]}/TestPredict.csv')
    singleRMSE[f'{len_int[begin_i]}-{len_int[end_i]}'] = np.sqrt(np.mean(GT_Predict['SE'].values))
singleRMSE

加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 4387 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 4235 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 4083 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3931 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3779 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3627 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3475 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3324 个序列
加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3174 个序列


{'1-2': 0.012702885973638027,
 '2-3': 0.01744671195387434,
 '3-4': 0.02053181573958394,
 '4-5': 0.02028508749234727,
 '5-6': 0.017425853710707193,
 '6-7': 0.01736432472416012,
 '7-8': 0.017525421337545498,
 '8-9': 0.017384596429911345,
 '9-10': 0.017025971737486996}

加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3779 个序列


{'1-6': 0.210917954219712,
 '1-11': 0.21851589006213346,
 '1-16': 0.21827782265797735,
 '1-21': 0.22146391763678086,
 '1-100': 0.2323536924420252,
 '6-11': 0.2106737985885054,
 '6-16': 0.195371927443585,
 '6-21': 0.20073864451274281,
 '6-100': 0.20793596168469075,
 '11-16': 0.20418009200643983,
 '11-21': 0.22919615432858365,
 '11-100': 0.20012317544736888,
 '16-21': 0.20075688699918615,
 '16-100': 0.20183737381000857,
 '21-100': 0.21374130207922973}

加载数据，总共有 152 个台风， 4539 条台风数据
总共生成 3779 个序列


{'1-2': 0.38129860565450224,
 '2-3': 1.2746461374090112,
 '3-4': 0.3091092062719583,
 '4-5': 0.15773254807262052,
 '5-6': 0.11484982496166668,
 '6-7': 0.12534150336342256,
 '7-8': 0.13433597877353642,
 '8-9': 0.14610827088055997,
 '9-10': 0.15087116359981886}

{'1-2': 0.38129860565450224,
 '2-3': 1.2746461374090112,
 '3-4': 0.3091092062719583,
 '4-5': 0.15773254807262052,
 '5-6': 0.11484982496166668,
 '6-7': 0.12534150336342256,
 '7-8': 0.13433597877353642,
 '8-9': 0.14610827088056,
 '9-10': 0.15087116359981886}