In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn import metrics
from lightgbm import LGBMRegressor

In [None]:
train = pd.read_csv('../input/train_V2.csv')
train.head()

In [None]:
test1 = pd.read_csv('../input/test_V2.csv')
test1.head()

In [None]:
train.columns

In [None]:
train = train.assign(team_size=train.groupby('groupId').groupId.transform('count'))
train = train.assign(max_team_size=train.groupby('matchId').team_size.transform('max'))
train = train.assign(match_size=train.groupby('matchId').Id.transform('nunique'))

In [None]:
train['opponents'] = train.match_size - train.team_size
train['total_dist'] = train.rideDistance + train.swimDistance + train.walkDistance
train['total_items'] = train.boosts + train.heals + train.weaponsAcquired
train['items_per_dist'] =  train.total_items/train.total_dist
train['items_per_dist'] =  train.total_items/train.total_dist
train['kills_per_dist'] = train.kills/train.total_dist
train['knocked_per_dist'] = train.DBNOs/train.total_dist
train['damage_per_dist'] = train.damageDealt/train.total_dist
train['headshot_kill_pct'] = train.headshotKills/train.kills
train['max_kills_by_team'] = train.groupby('groupId').kills.transform('max')
train['damageDealt_team'] = train.groupby('groupId').damageDealt.transform('sum')
train['kills_team'] =  train.groupby('groupId').kills.transform('sum')
train['items_team'] = train.groupby('groupId').total_items.transform('sum')
train['pct_killed'] = train.kills/train.opponents
train['pct_knocked'] = train.DBNOs/train.opponents
train['pct_team_kills'] = train.kills_team/train.opponents
train['team_kill_pts'] = train.groupby('groupId').killPoints.transform('sum')
train['team_kill_rank'] = train.groupby('groupId').killPlace.transform('mean')
train['max_kills_match'] = train.groupby('matchId').kills.transform('max')
train['total_kills_match'] = train.groupby('matchId').kills.transform('sum')
train['total_dist_match'] = train.groupby('matchId').total_dist.sum()
train.fillna(0, inplace=True)

In [None]:
train.columns

In [None]:
test1 = test1.assign(team_size=test1.groupby('groupId').groupId.transform('count'))
test1 = test1.assign(max_team_size=test1.groupby('matchId').team_size.transform('max'))
test1 = test1.assign(match_size=test1.groupby('matchId').Id.transform('nunique'))

test1['opponents'] = test1.match_size - test1.team_size
test1['total_dist'] = test1.rideDistance + test1.swimDistance + test1.walkDistance
test1['total_items'] = test1.boosts + test1.heals + test1.weaponsAcquired
test1['items_per_dist'] =  test1.total_items/test1.total_dist
test1['items_per_dist'] =  test1.total_items/test1.total_dist
test1['kills_per_dist'] = test1.kills/test1.total_dist
test1['knocked_per_dist'] = test1.DBNOs/test1.total_dist
test1['damage_per_dist'] = test1.damageDealt/test1.total_dist
test1['headshot_kill_pct'] = test1.headshotKills/test1.kills
test1['max_kills_by_team'] = test1.groupby('groupId').kills.transform('max')
test1['damageDealt_team'] = test1.groupby('groupId').damageDealt.transform('sum')
test1['kills_team'] =  test1.groupby('groupId').kills.transform('sum')
test1['items_team'] = test1.groupby('groupId').total_items.transform('sum')
test1['pct_killed'] = test1.kills/test1.opponents
test1['pct_knocked'] = test1.DBNOs/test1.opponents
test1['pct_team_kills'] = test1.kills_team/test1.opponents
test1['team_kill_pts'] = test1.groupby('groupId').killPoints.transform('sum')
test1['team_kill_rank'] = test1.groupby('groupId').killPlace.transform('mean')
test1['max_kills_match'] = test1.groupby('matchId').kills.transform('max')
test1['total_kills_match'] = test1.groupby('matchId').kills.transform('sum')
test1['total_dist_match'] = test1.groupby('matchId').total_dist.sum()
test1.fillna(0, inplace=True)

In [None]:
train['matchType'].value_counts()

In [None]:
matchTypes = train['matchType'].value_counts().index.tolist()
matchTypes

In [None]:
import warnings
warnings.filterwarnings('ignore')

for i in range(len(matchTypes)):
    train_matchType = train[train['matchType'] == matchTypes[i]]
    train_MT_x = train_matchType.drop(['Id', 'groupId', 'matchId', 'matchType', 'winPlacePerc'], axis = 1)
    train_MT_y = train_matchType['winPlacePerc']
    
    X_train, X_test, y_train, y_test = train_test_split(train_MT_x, train_MT_y, test_size=0.33, random_state=57)
    if i == 0 or i == 3 or i == 9 or i == 10 or i == 11 or i >= 13:
        lgbm = LGBMRegressor(objective='mae', n_estimators=300,
                         learning_rate=0.1, num_leaves=350, 
                         n_jobs=-1,  random_state=40, verbose=0)
    else:
        lgbm = LGBMRegressor(objective='poisson', n_estimators=300,
                         learning_rate=0.1, num_leaves=350, 
                         n_jobs=-1,  random_state=40, verbose=0)
    lgbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], 
             eval_metric='mae', early_stopping_rounds=10, verbose = 0)
    predictions = lgbm.predict(X_test, num_iteration=lgbm.best_iteration_)
    
    clip_preds = predictions.clip(0, 1)
    r2_score = metrics.r2_score(y_test, clip_preds)
    print(r2_score)
    
    test_matchType = test1[test1['matchType'] == matchTypes[i]]
    test_MT_x = test_matchType.drop(['Id', 'groupId', 'matchId', 'matchType'], axis = 1)
    
    test_predictions = lgbm.predict(test_MT_x, num_iteration=lgbm.best_iteration_)
    
    test_matchType['winPlacePerc'] = test_predictions.clip(0, 1)
    print(test_matchType.head())
    
    if i == 0:
        test2 = test_matchType
    else:
        test2 = test2.append(test_matchType)

In [None]:
test2.head()

In [None]:
test3 = test2.sort_index()
test3.head()

In [None]:
test3.loc[test3.opponents == 0, 'winPlacePerc'] = 0
test3.loc[test3.opponents == 1, 'winPlacePerc'] = 0
test3.loc[test3.opponents == 2, 'winPlacePerc'] = 0
test3dum = test3[test3['opponents'] == 2]
test3dum['winPlacePerc'].value_counts()

In [None]:
submission = pd.DataFrame(
    {'Id': test1['Id'], 'winPlacePerc': test3['winPlacePerc']},
    columns = ['Id', 'winPlacePerc'])
submission.to_csv('submission.csv', index = False)