In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from time import time
import os

from keras import Sequential
from keras.layers import Dense, Dropout, Input
from keras.callbacks import TensorBoard

from tqdm import tqdm
warnings.filterwarnings('ignore')

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

train_path = './data/train_V2.csv'
test_path = './data/test_V2.csv'

from keras import backend as K
K.tensorflow_backend._get_available_gpus()

In [None]:
class get_data:
    def __init__(self, path, is_train=True):
        self.is_train = is_train
        data = pd.read_csv(path)
        
        if is_train:
            self.feature = data.drop('winPlacePerc', axis=1)
            self.label = data['winPlacePerc']
        else:
            self.feature = data
        del data
        
        self.deal_feature()
        self.x_train = self.feature
        if is_train:
            self.y_train = self.label
#         if is_train:
#             self.ts_f, self.ts_l, self.vs_f, self.vs_l = self.split_t_v()
        
    def deal_feature(self):
        self.add_cols()
        self.fillna()
        self.drop_cols()
#         self.featuring()
#         self.ohencode()
        self.drop_ids()
            
    def add_cols(self):
        self.feature['teamPlayers'] = self.feature['groupId'].map(self.feature['groupId'].value_counts())
        self.feature['gamePlayers'] = self.feature['matchId'].map(self.feature['matchId'].value_counts())
        self.feature['enemyPlayers'] = self.feature['gamePlayers'] - self.feature['teamPlayers']
        self.feature['totalDistance'] = self.feature['rideDistance'] + self.feature['swimDistance'] + self.feature['walkDistance']
        self.feature['enemyDamage'] = self.feature['assists'] + self.feature['kills']
        
        totalKills = self.feature.groupby(['matchId', 'groupId']).agg({'kills': lambda x: x.sum()})
        totalKills.rename(columns={'kills': 'squadKills'}, inplace=True)
        self.feature = self.feature.join(other=totalKills, on=['matchId', 'groupId'])
        
        self.feature['medicKits'] = self.feature['heals'] + self.feature['boosts']
        self.feature['medicPerKill'] = self.feature['medicKits'] / self.feature['enemyDamage']
        self.feature['distancePerHeals'] = self.feature['totalDistance'] / self.feature['heals']
        self.feature['headShotKillRatio'] = self.feature['headshotKills'] / self.feature['kills']
        self.feature['headshotKillRate'] = self.feature['headshotKills'] / self.feature['kills']
        self.feature['killPlaceOverMaxPlace'] = self.feature['killPlace'] / self.feature['maxPlace']
        self.feature['kills/walkDistance'] = self.feature['kills'] / self.feature['walkDistance']
        self.feature['avgKills'] = self.feature['squadKills'] / self.feature['teamPlayers']
        self.feature['damageRatio'] = self.feature['damageDealt'] / self.feature['enemyDamage']
        self.feature['distTravelledPerGame'] = self.feature['totalDistance'] / self.feature['matchDuration']
        self.feature['killPlacePerc'] = self.feature['killPlace'] / self.feature['gamePlayers']
        self.feature['playerSkill'] = self.feature['headshotKills'] + self.feature['roadKills'] + self.feature['assists'] - (5 * self.feature['teamKills'])
        self.feature['gamePlacePerc'] = self.feature['killPlace'] / self.feature['maxPlace']
    
    def fillna(self):
        self.feature.fillna(0, inplace=True)
        self.feature.replace(np.inf, 0, inplace=True)
        if self.is_train:
            self.label.fillna(0, inplace=True)
            self.label.replace(np.inf, 0, inplace=True)
            
#     def fillInf(self, val):
#         numcols = self.feature.select_dtypes(include='number').columns
#         cols = numcols[numcols != 'winPlacePerc']
#         self.feature[self.feature == np.Inf] = np.NaN
#         self.feature[self.feature == np.NINF] = np.NaN
#         for c in cols:
#             self.feature[c].fillna(val, inplace=True)
        
    def ohencode(self):
        '''
        solo  <-- solo,solo-fpp,normal-solo,normal-solo-fpp
        duo   <-- duo,duo-fpp,normal-duo,normal-duo-fpp,crashfpp,crashtpp
        squad <-- squad,squad-fpp,normal-squad,normal-squad-fpp,flarefpp,flaretpp
        '''
        mapper = lambda x: 'solo' if ('solo' in x) else 'duo' if ('duo' in x) or ('crash' in x) else 'squad'
        self.feature['matchType'] = self.feature['matchType'].apply(mapper)

        self.feature = pd.concat([self.feature, pd.get_dummies(self.feature['matchType'], prefix='matchType')], axis=1)
    
    def drop_cols(self):
        drop_cols = ['killPoints', 'rankPoints', 'winPoints', 'maxPlace']
        self.feature.drop(columns=drop_cols, inplace=True)
    
    def drop_ids(self):
        self.feature = self.feature.drop(['Id', 'groupId', 'matchId', 'matchType'], axis=1)
    
    def featuring(self):
        features = list(self.feature.columns)
        features.remove("Id")
        features.remove("matchId")
        features.remove("groupId")
        features.remove("matchType")
        condition='False'
        
        if 'winPlacePerc' in self.feature.columns:
            y = np.array(self.feature.groupby(['matchId','groupId'])['winPlacePerc'].agg('mean'), dtype=np.float64)
            features.remove("winPlacePerc")
            condition='True'
        
        # get group mean feature
        agg = self.feature.groupby(['matchId','groupId'])[features].agg('mean')
        agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()
        df_out = agg.reset_index()[['matchId','groupId']]
        df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
        df_out = df_out.merge(agg_rank, suffixes=["_mean", "_mean_rank"], how='left', on=['matchId', 'groupId'])
    
        # get group max feature
        agg = self.feature.groupby(['matchId','groupId'])[features].agg('max')
        agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()
        df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
        df_out = df_out.merge(agg_rank, suffixes=["_max", "_max_rank"], how='left', on=['matchId', 'groupId'])
        
        # get group min feature
        agg = self.feature.groupby(['matchId','groupId'])[features].agg('min')
        agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()
        df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
        df_out = df_out.merge(agg_rank, suffixes=["_min", "_min_rank"], how='left', on=['matchId', 'groupId'])
        
        # get match mean feature
        agg = self.feature.groupby(['matchId'])[features].agg('mean').reset_index()
        df_out = df_out.merge(agg, suffixes=["", "_match_mean"], how='left', on=['matchId'])
        df_id=df_out[["matchId", "groupId"]].copy()
        df_out.drop(["matchId", "groupId"], axis=1, inplace=True)

        del agg, agg_rank
        gc.collect()
        if condition == 'True':
            return df_out,pd.DataFrame(y),df_id
        else:
            return df_out,df_id
        
    def split_t_v(self):
        ts_f, vs_f, ts_l, vs_l = \
        train_test_split(
            self.feature,
            self.label,
            test_size=0.1,
            random_state=2
        )
        return ts_f, ts_l, vs_f, vs_l

In [None]:
class TrainValTensorBoard(TensorBoard):
    def __init__(self, log_dir='./logs', **kwargs):
        # Make the original `TensorBoard` log to a subdirectory 'training'
        training_log_dir = os.path.join(log_dir, 'training')
        super(TrainValTensorBoard, self).__init__(training_log_dir, **kwargs)

        # Log the validation metrics to a separate subdirectory
        self.val_log_dir = os.path.join(log_dir, 'validation')

    def set_model(self, model):
        # Setup writer for validation metrics
        self.val_writer = tf.summary.FileWriter(self.val_log_dir)
        super(TrainValTensorBoard, self).set_model(model)

    def on_epoch_end(self, epoch, logs=None):
        # Pop the validation logs and handle them separately with
        # `self.val_writer`. Also rename the keys so that they can
        # be plotted on the same figure with the training metrics
        logs = logs or {}
        print(logs)
        val_logs = {k.replace('val_', ''): v for k, v in logs.items() if k.startswith('val_')}
        for name, value in val_logs.items():
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value.item()
            summary_value.tag = name
            self.val_writer.add_summary(summary, epoch)
        self.val_writer.flush()

        # Pass the remaining logs to `TensorBoard.on_epoch_end`
        logs = {k: v for k, v in logs.items() if not k.startswith('val_')}
        super(TrainValTensorBoard, self).on_epoch_end(epoch, logs)

    def on_train_end(self, logs=None):
        super(TrainValTensorBoard, self).on_train_end(logs)
        self.val_writer.close()

In [None]:
class create_model:
    def __init__(
        self,
        shape,
        epochs=100,
        batch_size=100000,
        save_model=False,
        load_model=False,
        save_model_name='test',
        load_model_name='test',
        tensorboard=False,
    ):
        self.shape=shape
        self.epochs=epochs
        self.batch_size=batch_size
        
        self.save_model = save_model
        self.load_model = load_model
        self.save_model_json_path = './model/' + save_model_name + '.json'
        self.save_model_HDF5_path = './model/' + save_model_name + '.h5'
        self.load_model_json_path = './model/' + load_model_name + '.json'
        self.load_model_HDF5_path = './model/' + load_model_name + '.h5'
        self.has_tb = tensorboard
        
        self.model = Sequential()
        self.build_NN()
        self.compile_model()
        
        # tensorboard
        if self.has_tb:
            log_dir = './tensorboard/{}'.format(time())
            self.tensorboard = TrainValTensorBoard(log_dir=log_dir, write_graph=False)
        
    def build_NN(self):
        self.model.add(Dense(80,input_dim=self.shape,activation='selu'))
        self.model.add(Dense(160,activation='selu'))
        self.model.add(Dense(320,activation='selu'))
        self.model.add(Dropout(0.1))
        self.model.add(Dense(160,activation='selu'))
        self.model.add(Dense(80,activation='selu'))
        self.model.add(Dense(40,activation='selu'))
        self.model.add(Dense(20,activation='selu'))
        self.model.add(Dense(1,activation='sigmoid'))
        
    def compile_model(self):
        self.model.compile(optimizer='adam', loss='mse', metrics=['mae'])
        
    def train(self, x_train, y_train):
        self.history = self.model.fit(
            x_train,
            y_train,
            epochs=self.epochs,
            batch_size=self.batch_size,
#             callbacks=[self.tensorboard]
        )
        if self.save_model:
            self.save()
        
    def save(self):
        model_json = self.model.to_json()
        with open(self.save_model_json_path, 'w') as json_file:
            json_file.write(model_json)
        self.model.save_weights(self.save_model_HDF5_path)
        print("Saving the model...")

In [None]:
pg_data = get_data(train_path)
pg_model = create_model(shape=pg_data.x_train.shape[1])

In [None]:
pg_model.train(pg_data.x_train, pg_data.y_train)
# pg_data.x_train.head()

In [None]:
pg_test = get_data(test_path, is_train=False)
prediction = pg_model.model.predict(pg_test.x_train)

In [None]:
prediction_ravel = prediction.ravel()

In [None]:
prediction_ser = pd.Series(prediction_ravel, name='winPlacePerc')

In [None]:
submit = pd.read_csv(test_path)

In [None]:
submit['winPlacePerc'] = prediction_ser

In [None]:
act_sub = submit[['Id', 'winPlacePerc']]

In [None]:
act_sub.to_csv('sample_submission.csv', index=False)