In [41]:
import numpy as np
import tensorflow as tf
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# for tensorboard
import os
from tensorflow.python.eager import context

from time import time

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from sklearn.model_selection import train_test_split

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization, PReLU, Flatten
from keras.callbacks import LearningRateScheduler, EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.models import model_from_json
from keras import backend as K

train_path = './data/train_V2.csv'
test_path = './data/test_V2.csv'

In [42]:
class get_data:
    def __init__(self, path, is_train=True):
        data = pd.read_csv(path)
        
        if is_train:
            self.feature = data.drop('winPlacePerc', axis=1)
            self.label = data['winPlacePerc']
        else:
            self.feature = data
        del data
        
        self.deal_feature()
        if is_train:
            self.ts_f, self.ts_l, self.vs_f, self.vs_l = self.split_t_v()
        
    def deal_feature(self):
        self.fillInf(0)
        self.ohencode()
        self.drop_id()
            
    def fillInf(self, val):
        numcols = self.feature.select_dtypes(include='number').columns
        cols = numcols[numcols != 'winPlacePerc']
        self.feature[self.feature == np.Inf] = np.NaN
        self.feature[self.feature == np.NINF] = np.NaN
        for c in cols:
            self.feature[c].fillna(val, inplace=True)
        
    def ohencode(self):
        '''
        solo  <-- solo,solo-fpp,normal-solo,normal-solo-fpp
        duo   <-- duo,duo-fpp,normal-duo,normal-duo-fpp,crashfpp,crashtpp
        squad <-- squad,squad-fpp,normal-squad,normal-squad-fpp,flarefpp,flaretpp
        '''
        mapper = lambda x: 'solo' if ('solo' in x) else 'duo' if ('duo' in x) or ('crash' in x) else 'squad'
        self.feature['matchType'] = self.feature['matchType'].apply(mapper)

        self.feature = pd.concat([self.feature, pd.get_dummies(self.feature['matchType'], prefix='matchType')], axis=1)
    
    def drop_id(self):
        self.feature = self.feature.drop(['Id', 'groupId', 'matchId', 'matchType'], axis=1)
        
    def split_t_v(self):
        ts_f, vs_f, ts_l, vs_l = \
        train_test_split(
            self.feature,
            self.label,
            test_size=0.1,
            random_state=2
        )
        return ts_f, ts_l, vs_f, vs_l

In [43]:
class TrainValTensorBoard(TensorBoard):
    def __init__(self, log_dir='./logs', **kwargs):
        # Make the original `TensorBoard` log to a subdirectory 'training'
        training_log_dir = os.path.join(log_dir, 'training')
        super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)

        # Log the validation metrics to a separate subdirectory
        self.val_log_dir = os.path.join(log_dir, 'validation')

    def set_model(self, model):
        # Setup writer for validation metrics
        self.val_writer = tf.summary.FileWriter(self.val_log_dir)
        super(TrainValTensorBoard, self).set_model(model)

    def on_epoch_end(self, epoch, logs=None):
        # Pop the validation logs and handle them separately with
        # `self.val_writer`. Also rename the keys so that they can
        # be plotted on the same figure with the training metrics
        logs = logs or {}
        print(logs)
        val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
        for name, value in val_logs.items():
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value.item()
            summary_value.tag = name
            self.val_writer.add_summary(summary, epoch)
        self.val_writer.flush()

        # Pass the remaining logs to `TensorBoard.on_epoch_end`
        logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
        super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)

    def on_train_end(self, logs=None):
        super(TrainValTensorBoard, self).on_train_end(logs)
        self.val_writer.close()

In [44]:
class create_model:
    def __init__(
        self,
        input_shape,
#         learning_rate=0.001,
        batch_size=42,
        epochs=10,
        save_model=False,
        load_model=False,
        save_model_name='default',
        load_model_name='default',
        tensorboard=True,
    ):
        self.input_shape = input_shape
        self.batch_size = batch_size
        self.epochs = epochs
#         self.lr = learning_rate
        
        self.save_model = save_model
        self.load_model = load_model
        self.save_model_json_path = './model/' + save_model_name + '.json'
        self.save_model_HDF5_path = './model/' + save_model_name + '.h5'
        self.load_model_json_path = './model/' + load_model_name + '.json'
        self.load_model_HDF5_path = './model/' + load_model_name + '.h5'
        self.has_tb = tensorboard
        # tensorboard
        if self.has_tb:
            log_dir = './tensorboard/{}'.format(time())
            self.tensorboard = TrainValTensorBoard(log_dir=log_dir, write_graph=False)
        # loading model | create model
        if load_model:
            self.load()
        else:
            self.model = Sequential()
            self.buildDNN()
        # optimizer & model compile
        self.create_optimizer()
        self.model_compile()
        print('create model.')
        
    def save(self):
        model_json = self.model.to_json()
        with open(self.save_model_json_path, 'w') as json_file:
            json_file.write(model_json)
        self.model.save_weights(self.save_model_HDF5_path)
        print("Saving the model...")
        
    def load(self):
        try:
            json_file = open(self.load_model_json_path, 'r')
            loaded_model_json = json_file.read()
            json_file.close()

            self.model = model_from_json(loaded_model_json)
            # load weights into new model
            self.model.load_weights(self.load_model_HDF5_path)
            print("Loaded model...")
        except:
            print("Loading error!")
            
        
    def buildDNN(self):            
#         self.model.add(Flatten())
        self.model.add(Dense(512, kernel_initializer='he_normal', input_dim=self.input_shape, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(0.2))

        self.model.add(Dense(256, kernel_initializer='he_normal'))
        self.model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(0.2))

        self.model.add(Dense(128, kernel_initializer='he_normal'))
        self.model.add(PReLU(alpha_initializer='zeros', alpha_regularizer=None, alpha_constraint=None, shared_axes=None))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(0.1))

        self.model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
        

    def model_compile(self):
        self.model.compile(
            optimizer=self.adam,
            loss='mse',
            metrics=['mae']
        )
    
    def create_optimizer(self):
        self.adam = keras.optimizers.Adam(lr=0.005)
    
    def train(self, ts_f, ts_l, vs_f, vs_l):
        self.create_callbacks()
        
        cbs = [self.lr_sched, self.early_stopping]
        if self.has_tb:
            cbs.append(self.tensorboard)
        
        print('start training...')
        self.history = self.model.fit(
            ts_f,
            ts_l,
            epochs=self.epochs,
            batch_size=self.batch_size,
            validation_split=0.2,
            callbacks=cbs,
#             verbose=2
        )
    
        self.result = self.model.evaluate(x=vs_f, y=vs_l)
        print('Accuracy:', self.result[1])
        if self.save_model:
            self.save()
        self.print_plot()
    
    def create_callbacks(self):
        self.lr_sched = self.step_decay_schedule(initial_lr=0.001, decay_factor=0.97, step_size=1, verbose=1)
        self.early_stopping = EarlyStopping(monitor='val_mean_absolute_error', mode='min', patience=10, verbose=1)
        
    def step_decay_schedule(self, initial_lr=1e-3, decay_factor=0.75, step_size=10, verbose=0):
        ''' Wrapper function to create a LearningRateScheduler with step decay schedule. '''
        def schedule(epoch):
            return initial_lr * (decay_factor ** np.floor(epoch/step_size))

        return LearningRateScheduler(schedule, verbose)
    
    def print_plot(self):
        self.print_loss()
        self.print_acc()
    
    def print_loss(self):
        plt.plot(self.history.history['loss'])
        plt.title("Model Loss")
        plt.xlabel('Epochs')
        plt.ylabel('Loss')
        plt.legend(['Train', 'Test'])
        plt.show()
        
    def print_acc(self):
        plt.plot(self.history.history['mean_absolute_error'])
        plt.plot(self.history.history['val_mean_absolute_error'])
        plt.title('Mean Abosulte Error')
        plt.xlabel('Epochs')
        plt.ylabel('Mean absolute error')
        plt.legend(['Train','Test'])
        plt.show()
    
    def predict(self, test):
        pred_digits_test = np.argmax(self.model.predict(test), axis=1)
        image_id_test=[]
        for i in range(len(pred_digits_test)):
            image_id_test.append(i+1)
        d = {
            'ImageId': image_id_test,
            'Label': pred_digits_test
        }
        this.answer = pd.DataFrame(d)
        
    def save_pred(self, path):
        this.answer.to_csv(path, index=False)

In [45]:
pubg = get_data(train_path)
print(pubg.ts_f.shape[1])
print(pubg.ts_f.shape)
print(pubg.ts_l.shape)
print(pubg.vs_f.shape)
print(pubg.vs_l.shape)
model = create_model(
    input_shape=pubg.ts_f.shape[1]
#     batch_size=10,
#     num_classes=10,
#     epochs=2,
#     save_model=True,
#     load_model=True,
#     save_model_name='default',
#     load_model_name='default',
#     tensorboard=True
)

# Error when checking target: expected dense_12 to have shape (1,) but got array with shape (28,)

27
(4002269, 27)
(4002269,)
(444697, 27)
(444697,)
create model.


In [46]:
model.train(
    ts_f=pubg.ts_f,
    ts_l=pubg.ts_l,
    vs_f=pubg.vs_f,
    vs_l=pubg.vs_l
)


start training...
Train on 3201815 samples, validate on 800454 samples
Epoch 1/10

Epoch 00001: LearningRateScheduler setting learning rate to 0.001.
 245280/3201815 [=>............................] - ETA: 7:50 - loss: 0.0176 - mean_absolute_error: 0.0986

KeyboardInterrupt: 