In [1]:
# LSTM for international airline passengers problem with window regression framing
import numpy
import keras
import matplotlib.pyplot as plt
from pandas import read_csv
import math
from keras.models import Sequential
from keras.layers import Dense,Dropout
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.cross_validation import train_test_split


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
	dataX, dataY = [], []
	for i in range(len(dataset)-look_back-1):
		a = dataset[i:(i+look_back), 0]
		dataX.append(a)
		dataY.append(dataset[i + look_back, 0])
	return numpy.array(dataX), numpy.array(dataY)

In [4]:
# fix random seed for reproducibility
numpy.random.seed(7)
# load the dataset
dataframe = read_csv('/home/junjie/w_d_v.csv', usecols=[7], engine='python', skipfooter=3)
all_data = read_csv('/home/junjie/all_data.csv', usecols=[7], engine='python', skipfooter=3)
dataset = dataframe.values
allData=all_data.values

In [7]:
dataset[:10]

array([[  0],
       [ 30],
       [ 24],
       [ 88],
       [ 42],
       [143],
       [143],
       [ 59],
       [ 59],
       [187]])

In [5]:
look_back = 3
trainX, trainY = create_dataset(dataset, look_back)
AllX, AllY = create_dataset(allData, look_back)
trainY=numpy.reshape(trainY,(trainY.shape[0],-1))
AllY=numpy.reshape(AllY,(AllY.shape[0],-1))

In [6]:
encX = OneHotEncoder()
encX.fit(AllX)
print ("enc.n_values_ is:",encX.n_values_)
print ("enc.feature_indices_ is:",encX.feature_indices_)

enc.n_values_ is: [200 200 200]
enc.feature_indices_ is: [  0 200 400 600]


In [7]:
encY = OneHotEncoder()
encY.fit(AllY)
print ("enc.n_values_ is:",encY.n_values_)
print ("enc.feature_indices_ is:",encY.feature_indices_)

enc.n_values_ is: [200]
enc.feature_indices_ is: [  0 200]


In [8]:
trainX_one=encX.transform(trainX).toarray()

In [9]:
train_X=numpy.reshape(trainX_one,(trainX_one.shape[0],look_back,-1))

In [10]:
train_Y=encY.transform(trainY).toarray()

In [11]:
a_train, a_test, b_train, b_test = train_test_split(train_X, train_Y, test_size=0.1, random_state=42)

In [12]:
print(a_train.shape)

(1773, 3, 200)


In [40]:
# create and fit the LSTM network
model = Sequential()
# model.add(Embedding(max_features, output_dim=256))

model.add(LSTM(512,return_sequences=True,
               input_shape=(3, a_train.shape[2])))  # returns a sequence of vectors of dimension 32

model.add(LSTM(256))  # return a single vector of dimension 32
model.add(Dense(a_train.shape[2]))

# model.compile(loss='mean_squared_logarithmic_error', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='categorical_hinge', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='logcosh', optimizer='rmsprop',metrics=['accuracy'])
model.compile(loss='cosine_proximity', optimizer='rmsprop',metrics=['accuracy'])

#batchsize批尺寸
model.fit(a_train, b_train, epochs=100, batch_size=64, verbose=2, validation_data=(a_test, b_test))

Train on 1773 samples, validate on 197 samples
Epoch 1/100
 - 6s - loss: -1.5444e-01 - acc: 0.0998 - val_loss: -1.7751e-01 - val_acc: 0.1117
Epoch 2/100
 - 2s - loss: -2.3973e-01 - acc: 0.1788 - val_loss: -1.9026e-01 - val_acc: 0.1320
Epoch 3/100
 - 2s - loss: -2.7681e-01 - acc: 0.2149 - val_loss: -1.9808e-01 - val_acc: 0.1472
Epoch 4/100
 - 2s - loss: -2.9881e-01 - acc: 0.2386 - val_loss: -1.9921e-01 - val_acc: 0.1320
Epoch 5/100
 - 2s - loss: -3.1456e-01 - acc: 0.2583 - val_loss: -2.0543e-01 - val_acc: 0.1574
Epoch 6/100
 - 2s - loss: -3.2705e-01 - acc: 0.2735 - val_loss: -2.0264e-01 - val_acc: 0.1472
Epoch 7/100
 - 2s - loss: -3.3764e-01 - acc: 0.2854 - val_loss: -2.1109e-01 - val_acc: 0.1421
Epoch 8/100
 - 2s - loss: -3.4667e-01 - acc: 0.2995 - val_loss: -2.1063e-01 - val_acc: 0.1472
Epoch 9/100
 - 2s - loss: -3.5542e-01 - acc: 0.3198 - val_loss: -2.0382e-01 - val_acc: 0.1675
Epoch 10/100
 - 2s - loss: -3.6343e-01 - acc: 0.3283 - val_loss: -2.0916e-01 - val_acc: 0.1523
Epoch 11/100

Epoch 87/100
 - 2s - loss: -8.5798e-01 - acc: 0.8940 - val_loss: -1.6057e-01 - val_acc: 0.1421
Epoch 88/100
 - 2s - loss: -8.6184e-01 - acc: 0.8985 - val_loss: -1.6308e-01 - val_acc: 0.1472
Epoch 89/100
 - 2s - loss: -8.6242e-01 - acc: 0.8996 - val_loss: -1.6432e-01 - val_acc: 0.1421
Epoch 90/100
 - 2s - loss: -8.6576e-01 - acc: 0.9013 - val_loss: -1.5742e-01 - val_acc: 0.1421
Epoch 91/100
 - 2s - loss: -8.6667e-01 - acc: 0.8985 - val_loss: -1.6198e-01 - val_acc: 0.1523
Epoch 92/100
 - 2s - loss: -8.6837e-01 - acc: 0.8979 - val_loss: -1.5978e-01 - val_acc: 0.1269
Epoch 93/100
 - 2s - loss: -8.7009e-01 - acc: 0.8968 - val_loss: -1.5964e-01 - val_acc: 0.1523
Epoch 94/100
 - 2s - loss: -8.7272e-01 - acc: 0.9036 - val_loss: -1.6266e-01 - val_acc: 0.1523
Epoch 95/100
 - 2s - loss: -8.7428e-01 - acc: 0.9007 - val_loss: -1.6224e-01 - val_acc: 0.1421
Epoch 96/100
 - 2s - loss: -8.7612e-01 - acc: 0.9013 - val_loss: -1.6753e-01 - val_acc: 0.1320
Epoch 97/100
 - 2s - loss: -8.7778e-01 - acc: 0.90

<keras.callbacks.History at 0x7f4d6ab8df28>

In [41]:
model = Sequential()
# model.add(Embedding(max_features, output_dim=256))

model.add(LSTM(512,return_sequences=True,
               input_shape=(3, a_train.shape[2])))  # returns a sequence of vectors of dimension 32

model.add(LSTM(256))  # return a single vector of dimension 32
model.add(Dense(a_train.shape[2]))

# model.compile(loss='mean_squared_logarithmic_error', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='categorical_hinge', optimizer='rmsprop',metrics=['accuracy'])
model.compile(loss='logcosh', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='cosine_proximity', optimizer='rmsprop',metrics=['accuracy'])

#batchsize批尺寸
model.fit(a_train, b_train, epochs=100, batch_size=64, verbose=2, validation_data=(a_test, b_test))

Train on 1773 samples, validate on 197 samples
Epoch 1/100
 - 6s - loss: 0.0021 - acc: 0.1043 - val_loss: 0.0021 - val_acc: 0.1066
Epoch 2/100
 - 2s - loss: 0.0021 - acc: 0.1624 - val_loss: 0.0021 - val_acc: 0.1168
Epoch 3/100
 - 2s - loss: 0.0021 - acc: 0.1839 - val_loss: 0.0021 - val_acc: 0.1269
Epoch 4/100
 - 2s - loss: 0.0020 - acc: 0.1997 - val_loss: 0.0021 - val_acc: 0.1269
Epoch 5/100
 - 2s - loss: 0.0020 - acc: 0.2132 - val_loss: 0.0021 - val_acc: 0.1218
Epoch 6/100
 - 2s - loss: 0.0020 - acc: 0.2211 - val_loss: 0.0021 - val_acc: 0.1218
Epoch 7/100
 - 2s - loss: 0.0020 - acc: 0.2335 - val_loss: 0.0021 - val_acc: 0.1320
Epoch 8/100
 - 2s - loss: 0.0020 - acc: 0.2346 - val_loss: 0.0021 - val_acc: 0.1421
Epoch 9/100
 - 2s - loss: 0.0020 - acc: 0.2431 - val_loss: 0.0021 - val_acc: 0.1574
Epoch 10/100
 - 2s - loss: 0.0020 - acc: 0.2459 - val_loss: 0.0021 - val_acc: 0.1371
Epoch 11/100
 - 2s - loss: 0.0020 - acc: 0.2538 - val_loss: 0.0021 - val_acc: 0.1421
Epoch 12/100
 - 2s - loss: 

Epoch 97/100
 - 2s - loss: 0.0017 - acc: 0.4224 - val_loss: 0.0023 - val_acc: 0.1980
Epoch 98/100
 - 2s - loss: 0.0017 - acc: 0.4292 - val_loss: 0.0023 - val_acc: 0.1929
Epoch 99/100
 - 2s - loss: 0.0017 - acc: 0.4309 - val_loss: 0.0023 - val_acc: 0.1929
Epoch 100/100
 - 2s - loss: 0.0017 - acc: 0.4236 - val_loss: 0.0023 - val_acc: 0.1980


<keras.callbacks.History at 0x7f4d6373b8d0>

In [13]:
model = Sequential()
# model.add(Embedding(max_features, output_dim=256))

model.add(LSTM(512,return_sequences=True,
               input_shape=(3, a_train.shape[2])))  # returns a sequence of vectors of dimension 32

model.add(LSTM(256))  # return a single vector of dimension 32
model.add(Dense(a_train.shape[2]))

# model.compile(loss='mean_squared_logarithmic_error', optimizer='rmsprop',metrics=['accuracy'])
model.compile(loss='categorical_hinge', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='logcosh', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='cosine_proximity', optimizer='rmsprop',metrics=['accuracy'])

#batchsize批尺寸
model.fit(a_train, b_train, epochs=100, batch_size=64, verbose=2, validation_data=(a_test, b_test))

Train on 1773 samples, validate on 197 samples
Epoch 1/100
 - 3s - loss: 1.0125 - acc: 0.0767 - val_loss: 1.0097 - val_acc: 0.0558
Epoch 2/100
 - 2s - loss: 1.0065 - acc: 0.1376 - val_loss: 1.0091 - val_acc: 0.1066
Epoch 3/100
 - 2s - loss: 1.0045 - acc: 0.1602 - val_loss: 1.0089 - val_acc: 0.0914
Epoch 4/100
 - 2s - loss: 1.0014 - acc: 0.1861 - val_loss: 1.0073 - val_acc: 0.1218
Epoch 5/100
 - 2s - loss: 0.9998 - acc: 0.2076 - val_loss: 1.0088 - val_acc: 0.0761
Epoch 6/100
 - 2s - loss: 0.9980 - acc: 0.2290 - val_loss: 1.0101 - val_acc: 0.0914
Epoch 7/100
 - 2s - loss: 0.9971 - acc: 0.2363 - val_loss: 1.0074 - val_acc: 0.1269
Epoch 8/100
 - 2s - loss: 0.9953 - acc: 0.2516 - val_loss: 1.0077 - val_acc: 0.1168
Epoch 9/100
 - 2s - loss: 0.9936 - acc: 0.2764 - val_loss: 1.0087 - val_acc: 0.0914
Epoch 10/100
 - 2s - loss: 0.9926 - acc: 0.2561 - val_loss: 1.0105 - val_acc: 0.1269
Epoch 11/100
 - 2s - loss: 0.9905 - acc: 0.2809 - val_loss: 1.0117 - val_acc: 0.1472
Epoch 12/100
 - 2s - loss: 

Epoch 97/100
 - 2s - loss: 0.4081 - acc: 0.7400 - val_loss: 2.8443 - val_acc: 0.1269
Epoch 98/100
 - 2s - loss: 0.3977 - acc: 0.7530 - val_loss: 2.8866 - val_acc: 0.1371
Epoch 99/100
 - 2s - loss: 0.3986 - acc: 0.7501 - val_loss: 2.7891 - val_acc: 0.1421
Epoch 100/100
 - 2s - loss: 0.3916 - acc: 0.7445 - val_loss: 2.9374 - val_acc: 0.1371


<keras.callbacks.History at 0x7ff3af259be0>

In [14]:
model = Sequential()
# model.add(Embedding(max_features, output_dim=256))

model.add(LSTM(512,return_sequences=True,
               input_shape=(3, a_train.shape[2])))  # returns a sequence of vectors of dimension 32

model.add(LSTM(256))  # return a single vector of dimension 32
model.add(Dense(a_train.shape[2]))

model.compile(loss='mean_squared_logarithmic_error', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='categorical_hinge', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='logcosh', optimizer='rmsprop',metrics=['accuracy'])
# model.compile(loss='cosine_proximity', optimizer='rmsprop',metrics=['accuracy'])

#batchsize批尺寸
model.fit(a_train, b_train, epochs=100, batch_size=64, verbose=2, validation_data=(a_test, b_test))

Train on 1773 samples, validate on 197 samples
Epoch 1/100
 - 3s - loss: 0.0023 - acc: 0.0948 - val_loss: 0.0023 - val_acc: 0.1066
Epoch 2/100
 - 2s - loss: 0.0023 - acc: 0.1506 - val_loss: 0.0023 - val_acc: 0.1168
Epoch 3/100
 - 2s - loss: 0.0023 - acc: 0.1686 - val_loss: 0.0023 - val_acc: 0.1015
Epoch 4/100
 - 2s - loss: 0.0022 - acc: 0.1878 - val_loss: 0.0023 - val_acc: 0.1117
Epoch 5/100
 - 2s - loss: 0.0022 - acc: 0.1963 - val_loss: 0.0023 - val_acc: 0.1218
Epoch 6/100
 - 2s - loss: 0.0022 - acc: 0.2053 - val_loss: 0.0023 - val_acc: 0.1218
Epoch 7/100
 - 2s - loss: 0.0022 - acc: 0.2126 - val_loss: 0.0023 - val_acc: 0.1269
Epoch 8/100
 - 2s - loss: 0.0022 - acc: 0.2166 - val_loss: 0.0023 - val_acc: 0.1371
Epoch 9/100
 - 2s - loss: 0.0022 - acc: 0.2290 - val_loss: 0.0023 - val_acc: 0.1218
Epoch 10/100
 - 2s - loss: 0.0022 - acc: 0.2324 - val_loss: 0.0023 - val_acc: 0.1320
Epoch 11/100
 - 2s - loss: 0.0021 - acc: 0.2442 - val_loss: 0.0023 - val_acc: 0.1320
Epoch 12/100
 - 2s - loss: 

Epoch 97/100
 - 2s - loss: 0.0012 - acc: 0.5460 - val_loss: 0.0030 - val_acc: 0.1269
Epoch 98/100
 - 2s - loss: 0.0012 - acc: 0.5460 - val_loss: 0.0030 - val_acc: 0.1168
Epoch 99/100
 - 2s - loss: 0.0012 - acc: 0.5499 - val_loss: 0.0029 - val_acc: 0.1015
Epoch 100/100
 - 2s - loss: 0.0012 - acc: 0.5510 - val_loss: 0.0030 - val_acc: 0.1218


<keras.callbacks.History at 0x7ff3ad1cf358>

In [22]:
#pip3 install h5py 
import h5py

In [18]:
# define LSTM
from keras.layers import TimeDistributed
from keras.layers import Bidirectional
model = Sequential()
model.add(Bidirectional(LSTM(512, return_sequences=True), input_shape=(3, a_train.shape[2])))
# model.add(TimeDistributed(Dense(1, activation='sigmoid')))

model.add(LSTM(256))  # return a single vector of dimension 32
model.add(Dense(a_train.shape[2]))
model.compile(loss='cosine_proximity', optimizer='rmsprop',metrics=['accuracy'])

model.fit(a_train, b_train, epochs=100, batch_size=64, verbose=2, validation_data=(a_test, b_test))

Train on 1773 samples, validate on 197 samples
Epoch 1/100
 - 5s - loss: -1.6753e-01 - acc: 0.1359 - val_loss: -1.9868e-01 - val_acc: 0.1269
Epoch 2/100
 - 4s - loss: -2.7132e-01 - acc: 0.2245 - val_loss: -2.1690e-01 - val_acc: 0.1675
Epoch 3/100
 - 4s - loss: -3.0682e-01 - acc: 0.2578 - val_loss: -2.1477e-01 - val_acc: 0.1472
Epoch 4/100
 - 4s - loss: -3.2844e-01 - acc: 0.2803 - val_loss: -2.1599e-01 - val_acc: 0.1726
Epoch 5/100
 - 4s - loss: -3.4212e-01 - acc: 0.2961 - val_loss: -2.1895e-01 - val_acc: 0.1827
Epoch 6/100
 - 4s - loss: -3.5470e-01 - acc: 0.3125 - val_loss: -2.2254e-01 - val_acc: 0.1624
Epoch 7/100
 - 4s - loss: -3.6380e-01 - acc: 0.3192 - val_loss: -2.1836e-01 - val_acc: 0.1827
Epoch 8/100
 - 4s - loss: -3.7142e-01 - acc: 0.3305 - val_loss: -2.1550e-01 - val_acc: 0.1675
Epoch 9/100
 - 4s - loss: -3.7821e-01 - acc: 0.3305 - val_loss: -2.1419e-01 - val_acc: 0.1624
Epoch 10/100
 - 4s - loss: -3.8583e-01 - acc: 0.3412 - val_loss: -2.1536e-01 - val_acc: 0.1726
Epoch 11/100

Epoch 87/100
 - 4s - loss: -8.8099e-01 - acc: 0.8973 - val_loss: -1.5712e-01 - val_acc: 0.1472
Epoch 88/100
 - 4s - loss: -8.8177e-01 - acc: 0.9024 - val_loss: -1.5418e-01 - val_acc: 0.1472
Epoch 89/100
 - 4s - loss: -8.8283e-01 - acc: 0.9030 - val_loss: -1.6430e-01 - val_acc: 0.1675
Epoch 90/100
 - 4s - loss: -8.8433e-01 - acc: 0.8996 - val_loss: -1.5513e-01 - val_acc: 0.1827
Epoch 91/100
 - 4s - loss: -8.8543e-01 - acc: 0.8996 - val_loss: -1.5499e-01 - val_acc: 0.1574
Epoch 92/100
 - 4s - loss: -8.8691e-01 - acc: 0.9047 - val_loss: -1.5055e-01 - val_acc: 0.1523
Epoch 93/100
 - 4s - loss: -8.8771e-01 - acc: 0.9013 - val_loss: -1.5408e-01 - val_acc: 0.1574
Epoch 94/100
 - 4s - loss: -8.8912e-01 - acc: 0.8985 - val_loss: -1.5372e-01 - val_acc: 0.1472
Epoch 95/100
 - 4s - loss: -8.9022e-01 - acc: 0.9007 - val_loss: -1.5114e-01 - val_acc: 0.1574
Epoch 96/100
 - 4s - loss: -8.9113e-01 - acc: 0.9002 - val_loss: -1.5974e-01 - val_acc: 0.1523
Epoch 97/100
 - 4s - loss: -8.9276e-01 - acc: 0.90

<keras.callbacks.History at 0x7ff338a8ac50>

In [14]:
from keras.models import *

In [15]:
model.save('wdv.h5')   # HDF5 file, you have to pip3 install h5py if don't have it

In [16]:
print(os.path.abspath('.'))

/Users/xiongjunjie/Documents/code_doc/python


In [17]:
from keras.models import load_model

model = load_model('wdv.h5')#调整好，保存为每个用户的名字

In [53]:
testdata = read_csv('weekend_day_test.csv', usecols=[7], engine='python', skipfooter=3)
Tdataset = testdata.values
look_back = 3
TX, TY = create_dataset(Tdataset, look_back)

In [54]:
encX = OneHotEncoder()
encX.fit(AllX)
TX=encX.transform(TX).toarray()
TestX=numpy.reshape(TX,(TX.shape[0],look_back,-1))

In [55]:
TY=numpy.reshape(TY,(TY.shape[0],-1))

encY = OneHotEncoder()
encY.fit(AllY)
TestY=encY.transform(TY).toarray()


In [56]:
model.evaluate(TestX, TestY, batch_size=64, verbose=2, sample_weight=None)

[-0.07590291933040273, 0.07372175980975029]

In [None]:
trainPredict = model.predict(a_train)

In [None]:
print(trainPredict[0])
print(b_train[0]*trainPredict[0])