In [None]:
import warnings
warnings.filterwarnings("ignore")

#######################################################################################
import matplotlib
import matplotlib.pyplot as plt
#matplotlib內建不支援中文，解決辦法：每次預先指定字體
matplotlib.rcParams.update({'font.size': 36})
matplotlib.use('qt4agg')
myfont = matplotlib.font_manager.FontProperties(fname='C:\\Windows\\Fonts\\msjh.ttc')
#指定字體
matplotlib.rcParams['axes.unicode_minus']=False

from matplotlib.ticker import FormatStrFormatter
majorFormatter = FormatStrFormatter('%0.f') #設定圖表浮點數的格式
%matplotlib inline 
#繪圖完就直接顯示該圖，省略每次繪圖完都要輸入plt.show指令的動作
#######################################################################################
import seaborn as sns

import operator
import numpy as np
# 設定array的float格式
float_formatter = lambda x: "%.2f" % x
np.set_printoptions(formatter={'float_kind':float_formatter})
import pandas as pd
pd.set_option('display.max_columns', 100)  #設定可顯示欄位的上限
pd.options.display.float_format = '{:,.6f}'.format  #設定浮點數的格式

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error


In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, GRU
from keras.wrappers.scikit_learn import KerasClassifier

# 資料讀取

In [None]:
data = pd.read_excel('data.xlsx', encoding='utf8')

In [None]:
data = data.fillna(0)
data.info()

In [None]:
data.head(10)

# 建立訓練樣本

In [None]:
#訓練樣本(X)，過去30天是否登入
train_df = data.iloc[:,-37:-7].copy()
train_df.head()

In [None]:
#預測對象(Y)，未來一週是否登入
train_y = data.iloc[:, -7:].copy()
train_y.head()

In [None]:
#GRU與lstm同樣要求輸入必須為3維，於是利用np.reshape函數將其轉換為3維
train_value = train_df.values
train_value = np.reshape(train_value, (train_value.shape[0], train_value.shape[1], 1))
print(train_value.shape) #樣本數, timesteps, 特徵數

## 調整模型參數

In [None]:
#node代表各層神經元的數量，dropout_rate代表drop的比率，opt代表優化器

def get_model(node=128, dropout_rate=0, opt='nadam'):
    model = Sequential()
    model.add(GRU(units=node,input_shape=(train_value.shape[1], train_value.shape[2]), return_sequences=True))#timestep, feature
    model.add(Dropout(dropout_rate)) #依一定比例斷開神經元連結，降低模型複雜度，避免過擬合
    model.add(GRU(units=node))
    model.add(Dropout(dropout_rate))
    model.add(Dense(7, activation='sigmoid')) #模型輸出未來7天的登入機率
    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=['accuracy'])
    
    return model #回傳keras模型

In [None]:
#KerasClassifier的作用是將模型包裝成sklearn的分類器，如此一來就能使用sklearn中自動調整參數的GridSearchCV

#build_fn參數放入函數，該函數必須回傳一個keras模型
neural_network = KerasClassifier(build_fn=get_model, batch_size=1024,epochs=1,verbose=0)

In [None]:
#調整神經元數量、dropout比率及優化器

nodes = [64, 256]
dropout_rates = [0.1,0.2,0.3,0.4]
opts = ['nadam', 'adam', 'rmsprop', 'sgd']

hyperparameters = dict(node=nodes, dropout_rate=dropout_rates, opt=opts) #必須為字典格式

In [None]:
#建立GridSearchCV
#estimator放入分類器
#param_grid放入事先設定好的參數範圍

grid = GridSearchCV(estimator=neural_network, param_grid=hyperparameters)

In [None]:
#GridSearchCV開始訓練
grid_result = grid.fit(train_value, train_y, verbose=1) #verbose設定為1可以看到詳細訓練過程

In [None]:
#顯示最佳參數及其在交叉驗證的表現
print('Best: %f using %s' %(grid_result.best_score_, grid_result.best_params_))

In [None]:
#套用GridSearchCV找出的參數
model = get_model(**grid_result.best_params_)
model.fit(train_value, train_y , epochs=60,batch_size=512, verbose=1)

In [None]:
train_y_pred_lstm = model.predict(train_value)

In [None]:

pred_df = pd.DataFrame(train_y_pred_lstm) #train_y_pred_lstm為np.array將其轉換成pandas的dataframe

for i in range(7):
    pred_df[i] = np.round(pred_df[i])

pred_df['sum'] = pred_df[0]+pred_df[1]+pred_df[2]+pred_df[3]+pred_df[4]+pred_df[5]+pred_df[6]
pred_df.head()


In [None]:

result = train_y.copy()
result['遊戲帳號'] = data.account
result['登入次數實際值'] = result.iloc[:,0]+result.iloc[:,1]+result.iloc[:,2]+result.iloc[:,3]+result.iloc[:,4]+result.iloc[:,5]+result.iloc[:,6]

result['下週登入次數預測']  = pred_df['sum']
result = result[['遊戲帳號','登入次數實際值', '下週登入次數預測']]
result.head(20)


In [None]:

y_true = result.登入次數實際值
y_pred = result.下週登入次數預測

#用RMSE衡量模型在訓練樣本上的表現
score = np.sqrt(mean_squared_error(y_true, y_pred))
score


In [None]:
#預測準確率
score = accuracy_score(y_true, y_pred)
score