# Modeling with tflearn

In [1]:
from IPython.core.display import HTML
css = open('style-table.css').read() + open('style-notebook.css').read()
HTML('<style>{}</style>'.format(css))

In [2]:
import h5py
# import dask
# from dask.multiprocessing import get
import numpy as np
from dask import array as da
from dask import delayed
import pandas as pd
import pathlib2 as pl

In [3]:
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 6)
pd.set_option('display.width', 1000)

# dask.set_options(get=get);

#### Loading data into dask (playground)

In [None]:
fpath = '/Volumes/CompanionEx/Data/dfs/PP_TS_2016-01-11-06_2016-01-11-13.hdf'

In [None]:
f = h5py.File(fpath, mode='r')
d = f['/features_weather']

In [None]:
d.shape

In [None]:
x = da.from_array(d, chunks=(100000,5))

In [None]:
x[:10].compute()

In [None]:
f.close()

#### Loading datasets (for real)

In [4]:
DF_DIR = pl.Path('/Volumes/CompanionEx/Data/dfs/')
CHUNK_SIZE = 1e5

In [6]:
features_ds = [h5py.File(str(fpath), 'r')['/features_weather'] for fpath in DF_DIR.glob('*.hdf')]
target_flow_ds = [h5py.File(str(fpath), 'r')['/target_flow'] for fpath in DF_DIR.glob('*.hdf')]
target_speed_ds = [h5py.File(str(fpath), 'r')['/target_speed'] for fpath in DF_DIR.glob('*.hdf')]

features_arrays = [da.from_array(dset, chunks=(CHUNK_SIZE, dset.shape[1])) for dset in features_ds]
target_flow_arrays = [da.from_array(dset, chunks=(CHUNK_SIZE, dset.shape[1])) for dset in target_flow_ds]
target_speed_arrays = [da.from_array(dset, chunks=(CHUNK_SIZE, dset.shape[1])) for dset in target_speed_ds]

features = da.concatenate(features_arrays, axis=0)
target_flow = da.concatenate(target_flow_arrays, axis=0)
target_speed = da.concatenate(target_speed_arrays, axis=0)

For implementing models use the concatenated datasets: `features`, `target_flow`, `targer_speed`. The data is lazy loaded and put in a certain number of `partitions`. In this case we have:

In [7]:
features.npartitions

1155

In [9]:
features[:10,:].compute()

array([[  3.50612936e+18,   1.45404342e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404348e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404354e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404360e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404366e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404372e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404378e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404384e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.45404390e+09,   0.00000000e+00,
          8.50000000e+00,   1.00000000e+01],
       [  3.50612936e+18,   1.4540439

#### Import tflearn and create model

In [None]:
import tflearn
# from tflearn.layers.core import *
# from tflearn.layers.conv import *
# from tflearn.data_utils import *
# from tflearn.layers.estimator import *

In [None]:
# Build network
# WORK IN PROGRESS!!

network = input_data(shape=[None, 32, 32, 3])
network = conv_2d(network, 32, 3, activation='relu')
network = max_pool_2d(network, 2)
network = dropout(network, 0.75)
network = conv_2d(network, 64, 3, activation='relu')
network = conv_2d(network, 64, 3, activation='relu')
network = max_pool_2d(network, 2)
network = dropout(network, 0.5)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 10, activation='softmax')
network = regression(network, optimizer='adam',
                     loss='categorical_crossentropy',
                     learning_rate=0.001)

net = tflearn.input_data(shape=[None, 200])
net = tflearn.embedding(net, input_dim=20000, output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.dropout(net, 0.5)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam',
                         loss='categorical_crossentropy')


In [None]:
# Training
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=50, shuffle=True, validation_set=(X_test, Y_test),
          show_metric=True, batch_size=96, run_id='cifar10_cnn')