## Trains an LSTM model

#### inputs:
- df_train01.csv
- df_test01.csv
- df_chunk_train01.csv
- df_chunk_test01.csv

#### outputs:
- normalizer.pkl
- batcher_train_lite_model03.pkl
- model03_epoch199.pkl

In [11]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import pickle
import sys
from importlib import reload
import logging

%matplotlib inline

In [12]:
sys.path.append('..')

#### Load data

In [13]:
df_train = pd.read_csv('../data/df_train01.csv',  parse_dates=['Datetime'])


df_chunk_train = pd.read_csv('../data/df_chunk_train01.csv')


In [14]:
df_test = pd.read_csv('../data/df_test01.csv',  parse_dates=['Datetime'])
df_chunk_test = pd.read_csv('../data/df_chunk_test01.csv')

In [15]:
df_train.head()

Unnamed: 0.1,Unnamed: 0,LocId,Datetime,Moisture,Latitude,Longitude,ChunkBegin,ChunkId,precip_intensity,precip_probability,...,humidity,wind_speed,wind_gust,cloud_cover,visibility,IsTmpNull,Day,Hour,Month,Weekday
0,647,6,2018-11-26 14:00:00,25.465,47.594,19.362,True,2,1.1862,0.81,...,0.82,5.04,10.95,1.0,1.865,False,26,14,11,0
1,648,6,2018-11-26 15:00:00,26.5375,47.594,19.362,False,2,1.2085,0.79,...,0.81,5.03,11.21,1.0,3.107,False,26,15,11,0
2,649,6,2018-11-26 16:00:00,26.81625,47.594,19.362,False,2,1.015,0.76,...,0.79,5.11,11.38,1.0,3.861,False,26,16,11,0
3,650,6,2018-11-26 17:00:00,26.248749,47.594,19.362,False,2,0.7393,0.72,...,0.77,5.19,11.48,1.0,4.739,False,26,17,11,0
4,651,6,2018-11-26 18:00:00,25.555,47.594,19.362,False,2,0.5413,0.63,...,0.78,4.15,8.25,1.0,14.807,False,26,18,11,0


#### Create a normalizer object

In [16]:
from lib import normalizing as nr

In [17]:
normalizer = nr.Normalizer(meanstd=['temperature', 'apparent_temperature', 'dew_point',
                                   'wind_speed', 'wind_gust'],
                      minmax=['visibility', 'precip_intensity', 'Moisture',
                              'Day', 'Hour', 'Month', 'Weekday'])

In [18]:
df_train = normalizer.normalize(df_train, init=True)

In [20]:
normalizer.save('../data/normalizer.pkl')

#### Create batcher object

In [21]:
from lib import batching as bt

In [22]:
bt = reload(bt)

In [23]:
col_feats_prev = ['humidity', 'precip_probability',
                  'precip_intensity', 'temperature', 'Moisture']
col_feats_curr = ['precip_probability', 'humidity', 'cloud_cover',
                  'apparent_temperature', 'dew_point', 'wind_speed',
                  'wind_gust', 'visibility', 'precip_intensity',
                  'Day', 'Hour', 'Month', 'Weekday'
                 ]

In [24]:
batcher_train = bt.BatcherTrain(df_train,
                df_chunk_train,
                40,
                36,
                col_dt='Datetime', # Datetime column name
                 col_val='Moisture', # ts column name to be predicted
                 col_gr = 'ChunkId',
                 col_feats_prev = col_feats_prev, # prev-feature column names
                 col_feats_current = col_feats_curr, # current feature column names
                 minlen=25, # chunk lengths are randomly sampled, minimum chunk length
                 maxlen=240        
               )

Save batcher without data for passing parameters to tester at v07.test01.ipynb

In [25]:
df_ts = batcher_train.df_ts
batcher_train.df_ts = None
batcher_train.save('../data/batcher_train_lite_model03.pkl')
batcher_train.df_ts = df_ts

#### Create LSTM model

In [14]:
from lib import lstm

In [44]:
lstm = reload(lstm)

In [45]:
model = lstm.LSTM(
    batcher_train.n_feat,
    200,
    hiddens_before=[800],
    hiddens_after=[800, batcher_train.out_time_len],
    lr_init=0.001,
    use_gpu=True
)

In [46]:
model.move_cpu()



Train the model

In [None]:
model.train_model(batcher_train, max_epochs=200, batchsize=150, save_every=None, save_folder=None)

In [15]:
import torch

In [None]:
torch.save(model, '../data/model03_epoch199.pkl')