## Trains an LSTM model

#### inputs:
- df_train01.csv
- df_test01.csv
- df_chunk_train01.csv
- df_chunk_test01.csv

#### outputs:
- normalizer.pkl
- batcher_train_lite_model03.pkl
- model03_epoch199.pkl

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import pickle
import sys
from importlib import reload
import logging

%matplotlib inline

In [2]:
sys.path.append('..')

#### Load data

In [3]:
df_train = pd.read_csv('../data/df_train01.csv',  parse_dates=['Datetime'])


df_chunk_train = pd.read_csv('../data/df_chunk_train01.csv')


In [4]:
df_test = pd.read_csv('../data/df_test01.csv',  parse_dates=['Datetime'])
df_chunk_test = pd.read_csv('../data/df_chunk_test01.csv')

#### Create a normalizer object

In [5]:
from lib import normalizing as nr

In [6]:
normalizer = nr.Normalizer(meanstd=['temperature', 'apparent_temperature', 'dew_point',
                                   'wind_speed', 'wind_gust'],
                      minmax=['visibility', 'precip_intensity', 'Moisture',
                              'Day', 'Hour', 'Month', 'Weekday'])

In [8]:
df_train = normalizer.normalize(df_train, init=True)

In [None]:
normalizer.save('./data/normalizer.pkl')

#### Create batcher object

In [9]:
from lib import batching as bt

In [10]:
bt = reload(bt)

In [11]:
col_feats_prev = ['humidity', 'precip_probability',
                  'precip_intensity', 'temperature', 'Moisture']
col_feats_curr = ['precip_probability', 'humidity', 'cloud_cover',
                  'apparent_temperature', 'dew_point', 'wind_speed',
                  'wind_gust', 'visibility', 'precip_intensity',
                  'Day', 'Hour', 'Month', 'Weekday'
                 ]

In [13]:
batcher_train = bt.BatcherTrain(df_train,
                df_chunk_train,
                40,
                36,
                col_dt='Datetime', # Datetime column name
                 col_val='Moisture', # ts column name to be predicted
                 col_gr = 'ChunkId',
                 col_feats_prev = col_feats_prev, # prev-feature column names
                 col_feats_current = col_feats_curr, # current feature column names
                 minlen=25, # chunk lengths are randomly sampled, minimum chunk length
                 maxlen=240        
               )

Save batcher without data for passing parameters to tester at v07.test01.ipynb

In [None]:
df_ts = batcher_train.df_ts
batcher_train.df_ts = None
batcher_train.save('./data/batcher_train_lite_model03.pkl')
batcher_train.df_ts = df_ts

#### Create LSTM model

In [14]:
from lib import lstm

In [44]:
lstm = reload(lstm)

In [45]:
model = lstm.LSTM(
    batcher_train.n_feat,
    200,
    hiddens_before=[800],
    hiddens_after=[800, batcher_train.out_time_len],
    lr_init=0.001,
    use_gpu=True
)

In [46]:
model.move_cpu()



Train the model

In [None]:
model.train_model(batcher_train, max_epochs=200, batchsize=150, save_every=None, save_folder=None)

In [15]:
import torch

In [None]:
torch.save(model, '../data/model03_epoch199.pkl')