In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from keras.models import Model
from keras.layers import Input, Dense, Dropout
from keras.regularizers import l2, l1
from keras.optimizers import RMSprop, Adam
from tensorflow import set_random_seed
set_random_seed(23333)
np.random.seed(233333)

import time
import sys
import gc
import pickle
sys.version_info

Using TensorFlow backend.


sys.version_info(major=3, minor=5, micro=6, releaselevel='final', serial=0)

# Model : Neural Network

# Load Data

In [2]:
data = pd.read_pickle('data.pkl')
# do not use ID features
data = data[[
    'date_block_num',
    #'shop_id',
    #'item_id',
    'item_cnt_month',
    #'city_code',
    #'item_category_id',
    #'type_code','subtype_code',
    'item_cnt_month_lag_1','item_cnt_month_lag_2','item_cnt_month_lag_3','item_cnt_month_lag_6','item_cnt_month_lag_12',
    'item_avg_sale_last_6', 'item_std_sale_last_6',
    'item_avg_sale_last_12', 'item_std_sale_last_12',
    'shop_avg_sale_last_6', 'shop_std_sale_last_6',
    'shop_avg_sale_last_12', 'shop_std_sale_last_12',
    'category_avg_sale_last_12', 'category_std_sale_last_12',
    'city_avg_sale_last_12', 'city_std_sale_last_12',
    'type_avg_sale_last_12', 'type_std_sale_last_12',
    'subtype_avg_sale_last_12', 'subtype_std_sale_last_12',
    'date_avg_item_cnt_lag_1',
    'date_item_avg_item_cnt_lag_1','date_item_avg_item_cnt_lag_2','date_item_avg_item_cnt_lag_3','date_item_avg_item_cnt_lag_6','date_item_avg_item_cnt_lag_12',
    'date_shop_avg_item_cnt_lag_1','date_shop_avg_item_cnt_lag_2','date_shop_avg_item_cnt_lag_3','date_shop_avg_item_cnt_lag_6','date_shop_avg_item_cnt_lag_12',
    'date_cat_avg_item_cnt_lag_1',
    'date_shop_cat_avg_item_cnt_lag_1',
    'date_city_avg_item_cnt_lag_1',
    'date_item_city_avg_item_cnt_lag_1',
    'delta_price_lag',
    'month','year',
    'item_shop_last_sale','item_last_sale',
    'item_shop_first_sale','item_first_sale',
]]

## 訓練集、校正集 產生

In [3]:
X_train = data[data.date_block_num < 33].drop(['item_cnt_month'], axis=1)
Y_train = data[data.date_block_num < 33]['item_cnt_month']
X_valid = data[data.date_block_num == 33].drop(['item_cnt_month'], axis=1)
Y_valid = data[data.date_block_num == 33]['item_cnt_month']
X_test = data[data.date_block_num == 34].drop(['item_cnt_month'], axis=1)

del data
gc.collect();

# 建立NN模型，調整參數
## try1

* 建立四層layer(神經元個數分別為128,64,32,1)
* batch size = 1000
* epochs = 20
* learning rate = 0.0005

In [8]:
# define model
def Sales_prediction_model(input_shape):
    in_layer = Input(input_shape)
    x = Dense(128,kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(in_layer)
    x = Dense(64, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    x = Dense(32, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    x = Dense(1, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    
    model = Model(inputs = in_layer, outputs = x, name='Sales_prediction_model')
    return model

# NN cannot take missing values, fill NaN with 0.
X_train.fillna(0,inplace=True)
X_valid.fillna(0,inplace=True)
X_test.fillna(0,inplace=True)

# We do no feature scaling here. 
# Some features like 'item_avg_sale_last_6' are already scaled in feature engineering part.

input_shape = [X_train.shape[1]]
model = Sales_prediction_model(input_shape)
model.compile(optimizer = Adam(lr=0.0005) , loss = ["mse"], metrics=['mse'])
model.fit(X_train, Y_train, validation_data = (X_valid, Y_valid), batch_size = 1000, epochs=20)

Train on 6186922 samples, validate on 238172 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x122625828>

## try2

* 建立三層layer(神經元個數分別為16,8,1)
* batch size = 10000
* epochs = 5
* learning rate = 0.0005

In [12]:
# define model
def Sales_prediction_model(input_shape):
    in_layer = Input(input_shape)
    x = Dense(16,kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(in_layer)
    x = Dense(8, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    x = Dense(1, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    
    model = Model(inputs = in_layer, outputs = x, name='Sales_prediction_model')
    return model

# NN cannot take missing values, fill NaN with 0.
X_train.fillna(0,inplace=True)
X_valid.fillna(0,inplace=True)
X_test.fillna(0,inplace=True)

# We do no feature scaling here. 
# Some features like 'item_avg_sale_last_6' are already scaled in feature engineering part.

input_shape = [X_train.shape[1]]
model = Sales_prediction_model(input_shape)
model.compile(optimizer = Adam(lr=0.0005) , loss = ["mse"], metrics=['mse'])
model.fit(X_train, Y_train, validation_data = (X_valid, Y_valid), batch_size = 10000, epochs=5)

Train on 6186922 samples, validate on 238172 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x11f8eb5f8>

## try3

* 建立三層layer(神經元個數分別為8,2,1)
* batch size = 10000
* epochs = 5
* learning rate = 0.0005

In [14]:
# define model
def Sales_prediction_model(input_shape):
    in_layer = Input(input_shape)
    x = Dense(8,kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(in_layer)
    x = Dense(2, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    x = Dense(1, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    
    model = Model(inputs = in_layer, outputs = x, name='Sales_prediction_model')
    return model

# NN cannot take missing values, fill NaN with 0.
X_train.fillna(0,inplace=True)
X_valid.fillna(0,inplace=True)
X_test.fillna(0,inplace=True)

# We do no feature scaling here. 
# Some features like 'item_avg_sale_last_6' are already scaled in feature engineering part.

input_shape = [X_train.shape[1]]
model = Sales_prediction_model(input_shape)
model.compile(optimizer = Adam(lr=0.0005) , loss = ["mse"], metrics=['mse'])
model.fit(X_train, Y_train, validation_data = (X_valid, Y_valid), batch_size = 10000, epochs=5)

Train on 6186922 samples, validate on 238172 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x11ebaf438>

## try4

* 建立三層layer(神經元個數分別為8,2,1)
* batch size = 10000
* epochs = 10
* learning rate = 0.0005

In [15]:
# define model
def Sales_prediction_model(input_shape):
    in_layer = Input(input_shape)
    x = Dense(8,kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(in_layer)
    x = Dense(2, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    x = Dense(1, kernel_initializer='RandomUniform', kernel_regularizer=l2(0.02), activation = "relu")(x)
    
    model = Model(inputs = in_layer, outputs = x, name='Sales_prediction_model')
    return model

# NN cannot take missing values, fill NaN with 0.
X_train.fillna(0,inplace=True)
X_valid.fillna(0,inplace=True)
X_test.fillna(0,inplace=True)

# We do no feature scaling here. 
# Some features like 'item_avg_sale_last_6' are already scaled in feature engineering part.

input_shape = [X_train.shape[1]]
model = Sales_prediction_model(input_shape)
model.compile(optimizer = Adam(lr=0.0005) , loss = ["mse"], metrics=['mse'])
model.fit(X_train, Y_train, validation_data = (X_valid, Y_valid), batch_size = 10000, epochs=10)

Train on 6186922 samples, validate on 238172 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11f0253c8>

### 結論：try2的結果是最好的 LB score為0.91多