# Chapter 3 - Deep Neural Networks for Time Series Forecasting the Easy Way
_pg. 31-50_

## Getting the data from the internet  

This example uses data from the internet, which we need to download and clean first.

In [29]:
import numpy as np
import pandas as pd
import urllib

url = "http://ww2.amstat.org/publications/jse/datasets/COE.xls"
loc = "/home/dpindk/irlab/big-data-summer-school-2017/COE.xls"
urllib.urlretrieve(url, loc)

('/home/dpindk/irlab/big-data-summer-school-2017/COE.xls',
 <httplib.HTTPMessage instance at 0x7fecfd4ddc68>)

## Cleaning Up Downloaded Spreadsheet Files

In [30]:
Excel_file = pd.ExcelFile(loc)

## Worksheet Names

In [31]:
print Excel_file.sheet_names

[u'COE data']


In [32]:
spreadsheet = Excel_file.parse('COE data')
print spreadsheet.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 265 entries, 0 to 264
Data columns (total 6 columns):
DATE      265 non-null datetime64[ns]
COE$      265 non-null float64
COE$_1    265 non-null float64
#Bids     265 non-null int64
Quota     265 non-null int64
Open?     265 non-null int64
dtypes: datetime64[ns](1), float64(2), int64(3)
memory usage: 12.5 KB
None


In [33]:
data = spreadsheet['COE$']

## View `data` Values

In [34]:
print data.head()

0     7400.0
1    11100.0
2     5002.0
3     3170.0
4     3410.0
Name: COE$, dtype: float64


## Adjusting Data

In [35]:
print spreadsheet['DATE'][193:204]

193   2004-02-01
194   2002-02-15
195   2004-03-01
196   2004-03-15
197   2004-04-01
198   2002-04-15
199   2004-05-01
200   2004-05-15
201   2004-06-01
202   2002-06-15
203   2004-07-01
Name: DATE, dtype: datetime64[ns]


Use the following to fix the year errors...

In [36]:
spreadsheet.set_value(194, 'DATE', '2004-02-15')
spreadsheet.set_value(198, 'DATE', '2004-04-15')
spreadsheet.set_value(202, 'DATE', '2004-06-15')
print spreadsheet['DATE'][193:204]

193   2004-02-01
194   2004-02-15
195   2004-03-01
196   2004-03-15
197   2004-04-01
198   2004-04-15
199   2004-05-01
200   2004-05-15
201   2004-06-01
202   2004-06-15
203   2004-07-01
Name: DATE, dtype: datetime64[ns]


## Saving the Data

As shown in the book, we can save the data for later use with the following code as a comma separated values `.csv` file.

In [37]:
loc = "/home/dpindk/irlab/big-data-summer-school-2017/COE.csv"
spreadsheet.to_csv(loc)

## How to Scale the Input Attributes

In [38]:
from sklearn import preprocessing
x = data
scaler = preprocessing.MinMaxScaler(feature_range=(0,1))

In [39]:
print scaler

MinMaxScaler(copy=True, feature_range=(0, 1))


In [40]:
print type(x)

<class 'pandas.core.series.Series'>


In [41]:
x = np.array(x).reshape((len(x), ))

In [42]:
print type(x)

<type 'numpy.ndarray'>


## Log Transform

In [43]:
x = np.log(x)

## A Note on Data Shape 

In [44]:
print x.shape

(265,)


In [45]:
x = x.reshape(-1,1)
print x.shape

(265, 1)


## Scale `x`

In [46]:
x = scaler.fit_transform(x)

In [47]:
x = x.reshape(-1)
print x.shape

(265,)


In [48]:
print(round(x.min(), 2))
print np.min(x)

0.0
0.0


In [49]:
print(round(x.max(), 2))
print np.max(x)

1.0
1.0


## Working with `statsmodels` Library

In [50]:
from statsmodels.tsa.stattools import pacf

In [51]:
x_pacf = pacf(x, nlags=5, method='ols')

In [52]:
print x_pacf

[ 1.          0.95969034 -0.27093837  0.22144024 -0.04729577  0.07360662]


## Import `nnet_ts`

In [53]:
from nnet_ts import *
count = 0
ahead = 12
pred = list()

## The `while` Loop

In [54]:
while count < ahead:
    end = len(x) - ahead + count
    np.random.seed(2016)
    
    fit1 = TimeSeriesNnet(hidden_layers=[7,3], activation_functions=["tanh", "tanh"])
    fit1.fit(x[0:end], lag=1, epochs=100)
    out = fit1.predict_ahead(n_ahead=1)
    
    print "Obs {:02d}: x={:0.4f}  y={:0.4f} ".format(count+1, round(x[count], 4), round(pd.Series(out), 4))
    
    pred.append(out)
    
    count += 1

Obs 01: x=0.4303  y=0.5208 
Obs 02: x=0.5174  y=0.5226 
Obs 03: x=0.3462  y=0.5425 
Obs 04: x=0.2482  y=0.5619 
Obs 05: x=0.2639  y=0.5825 
Obs 06: x=0.1979  y=0.5637 
Obs 07: x=0.0000  y=0.5434 
Obs 08: x=0.1064  y=0.5522 
Obs 09: x=0.3875  y=0.5487 
Obs 10: x=0.4726  y=0.5768 
Obs 11: x=0.5188  y=0.5845 
Obs 12: x=0.5406  y=0.5532 


## Realized and Predicted Values

In [55]:
pred1 = scaler.inverse_transform(pred)
pred1 = np.exp(pred1)
print np.round(pred1, 1)

[[ 11275.8]
 [ 11372. ]
 [ 12478.2]
 [ 13655.2]
 [ 15031.4]
 [ 13773.9]
 [ 12530.3]
 [ 13055.6]
 [ 12842.4]
 [ 14633.9]
 [ 15169. ]
 [ 13112.4]]
