In [1]:
'''
This file is to train convlstm with only fuel density as input"
'''

import sys
sys.path.append('../')

from __future__ import unicode_literals, print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from torch.utils import data
import itertools
import re
import random
import datetime
import time
from models.convlstm_single import EncoderRNN, ConvLSTM
from torch.autograd import Variable
# from penalty import DivergenceLoss
from utils_convlstm_single import train_epoch, eval_epoch, test_epoch
from data.dataset import IdealizedGrasslands
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# log to the file
import logging
file_name = 'convlstm'
logging.basicConfig(filename=f'training_{file_name}_{datetime.datetime.now().strftime("%Y_%m_%d_%H%M")}.log',
                    filemode='a',
                    format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s',
                    datefmt='%H:%M:%S',
                    level=logging.INFO)

In [3]:
train_direc = "/data/train/"
test_direc = "/data/test/"

min_mse=10
output_length=40
input_length=8
learning_rate=0.001
dropout_rate=0
kernel_size=3
batch_size=1

In [4]:
train_indices=list(range(0,800))
valid_indices = list(range(800, 900))
test_indices = list(range(900, 1000))

convcell =  ConvLSTM(input_shape=(75,75), input_dim=64, hidden_dims=[128,128,64], n_layers=3, kernel_size=(3,3), device=device)
model= EncoderRNN(convcell,device=device).to(device)

train_set = IdealizedGrasslands(train_indices, input_length , 10, output_length, train_direc)
valid_set = IdealizedGrasslands(valid_indices, input_length , 10, output_length, test_direc)

train_loader = data.DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = 8)
valid_loader = data.DataLoader(valid_set, batch_size = batch_size, shuffle = False, num_workers = 8)
loss_fun = torch.nn.L1Loss()

layer  0 input dim  64  hidden dim  128
layer  1 input dim  128  hidden dim  128
layer  2 input dim  128  hidden dim  64
replacing ignite.dat with b661ec09-0c4c-4769-9ff4-3068334de345
replacing ignite.dat with 9c5fe558-748c-40f9-815d-314a3ebf2578
replacing ignite.dat with 671f8a65-f3c3-48ba-83c0-1d66c32f9436
replacing ignite.dat with 66a0e10a-57a1-46ed-bd52-996e4c59547e
replacing ignite.dat with 77700f03-04c7-4352-8888-c4a6a3ed7d3c
Save ensemble to replace ignition file contents with new ids.
replacing ignite.dat with 9bcaa110-bf9f-4059-8bc0-0a755eb1e276
replacing ignite.dat with 08da48fd-79ef-41c5-b824-9b120c439c47
replacing ignite.dat with 9a0b5202-dabf-482f-a7b0-d6c222a8e1c6
replacing ignite.dat with d760e634-1efa-4bb7-a78c-7708c40418a3
replacing ignite.dat with cf20277a-1059-49ea-b84a-542f8a463297
Save ensemble to replace ignition file contents with new ids.


In [5]:
optimizer = torch.optim.Adam(model.parameters(), learning_rate, betas = (0.9, 0.999), weight_decay = 4e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 1, gamma = 0.9)

In [7]:
train_mse = []
valid_mse = []
test_mse = []
for i in range(50):
    start = time.time()
    logging.info(f'Start training epoch: {i}')
    torch.cuda.empty_cache()
    scheduler.step()
    model.train()
    teacher_force_ratio=np.maximum(0, 1 - i * 0.03)
    train_mse.append(train_epoch(train_loader, model, optimizer, loss_fun,teacher_force_ratio))
    model.eval()
    mse, preds, trues = eval_epoch(valid_loader, model, loss_fun)
    valid_mse.append(mse)
    if valid_mse[-1] < min_mse:
        min_mse = valid_mse[-1]
        best_model = model
        logging.info(f'Saving training epoch: {i}')
        torch.save(best_model, "save_model/convlstm_model_single5.pth")
    end = time.time()
    logging.info(f'End training epoch: {i}')
    if (len(train_mse) > 50 and np.mean(valid_mse[-5:]) >= np.mean(valid_mse[-10:-5])):
            break
    print(train_mse[-1], valid_mse[-1], round((end-start)/60,5))
    logging.info(f'Train MSE: {train_mse[-1]}, Val MSE :{valid_mse[-1]}, Train time:{round((end-start)/60,5)}')

0.01208 0.05265137 7.7681
0.00189 0.04943408 7.72448
0.00197 0.04879171 7.78048
0.00249 0.04895181 7.86729
0.00342 0.04839975 7.80603
0.00349 0.04841968 7.67049
0.00349 0.04476195 7.62898
0.00474 0.04810971 7.64626
0.00592 0.04802874 7.7186
0.00548 0.04051183 7.73074
0.00584 0.04640217 7.82048
0.00624 0.04286183 7.82547
0.00636 0.03955579 7.83275
0.00687 0.04046823 7.82742
0.00665 0.03644212 7.84235
0.00654 0.03362858 7.8186
0.00678 0.03092144 7.88467
0.00682 0.02861 7.83825
0.00671 0.02776322 7.96012
0.00651 0.02585985 7.93148
0.00674 0.02506025 7.97265
0.00683 0.02373843 8.05226
0.00678 0.02377281 7.81293
0.00689 0.02301344 7.97625
0.00734 0.02428049 7.89866
0.00728 0.02151641 7.82364
0.00747 0.0229181 9.03346
0.0075 0.02058036 9.32566
0.00734 0.01979316 9.2015
0.00766 0.02001058 9.72328
0.00769 0.01855109 9.71497
0.00765 0.01743261 9.50988
0.00772 0.01828089 9.3817
0.00779 0.01787104 9.47292
0.0077 0.01754435 9.30148
0.00759 0.01721784 9.4599
0.00749 0.0156439 9.18827
0.00734 0.0176