# Training Machine Learning Models

In [33]:
from pycaret.classification import *
from sklearn.model_selection import train_test_split
import pandas as pd
import json

In [34]:
df = pd.read_parquet('datasets/teams/battlelog_train.parquet')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129190 entries, 0 to 129189
Columns: 270 entries, battleTime to battle_power_diff
dtypes: Float64(19), category(32), datetime64[ns](7), float32(94), float64(36), int16(1), int64(25), int8(5), object(51)
memory usage: 189.5+ MB


In [35]:
# subset columns by feature importance

df = df[[
	'event_mode',
	'event_map',
	'battle_team1_player1_brawler_name',
	'battle_team1_player2_brawler_name',
	'battle_team1_player3_brawler_name',
	'battle_team2_player1_brawler_name',
	'battle_team2_player2_brawler_name',
	'battle_team2_player3_brawler_name',
	'avg_brawler_trophies_diff',
	# 'avg_highestTrophies_diff',
	# 'avg_trophies_diff',
	# 'avg_team_victories_diff',
	# 'avg_expPoints_diff',
	'max_brawler_trophies_diff',
	# 'max_highestTrophies_diff',
	# 'max_trophies_diff',
	# 'max_team_victories_diff',
	# 'max_expPoints_diff',
	'min_brawler_trophies_diff',
	# 'min_highestTrophies_diff',
	# 'min_trophies_diff',
	# 'min_team_victories_diff',
	# 'min_expPoints_diff',
	'battle_power_diff',
	'winner_team',                        
]]

In [36]:
# subset by gamemode
event_mode = 'gemGrab'
df = df[df['event_mode'] == event_mode]
df = df.drop(columns=['event_mode'])

In [37]:
def split_data(data, test_size, random_state):
	"""Funcion para dividir el dataset en train y test"""
	train, test = train_test_split(
			data,
			test_size=test_size,
			random_state=random_state
			)
		
	print('train: ', train.shape)
	print('test: ', test.shape)

	return train, test

seed=14697

train, test = split_data(df, test_size = 0.25, random_state=seed)

train:  (7656, 12)
test:  (2553, 12)


In [38]:
# setup model
session_1 = setup(
	data = train,
	target = 'winner_team',
	# fix_imbalance = True,
    # feature_selection= True,
	# remove_outliers=True,
	log_experiment = True,
    use_gpu=False,
    max_encoding_ohe=500,
    )

Unnamed: 0,Description,Value
0,Session id,1471
1,Target,winner_team
2,Target type,Binary
3,Target mapping,"1: 0, 2: 1"
4,Original data shape,"(7656, 12)"
5,Transformed data shape,"(7656, 419)"
6,Transformed train set shape,"(5359, 419)"
7,Transformed test set shape,"(2297, 419)"
8,Numeric features,4
9,Categorical features,7


In [39]:
#comparación de modelos
# model = compare_models()

In [40]:
model = create_model('lightgbm')

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.681,0.7438,0.7117,0.6794,0.6952,0.3609,0.3614
1,0.6772,0.7471,0.6934,0.681,0.6872,0.3539,0.3539
2,0.6437,0.713,0.6545,0.6522,0.6534,0.2868,0.2868
3,0.6586,0.7112,0.6764,0.6643,0.6703,0.3164,0.3164
4,0.6586,0.7236,0.6509,0.6729,0.6617,0.3173,0.3175
5,0.6474,0.7158,0.6982,0.6443,0.6702,0.2927,0.2938
6,0.6604,0.7283,0.6655,0.6703,0.6679,0.3206,0.3206
7,0.6343,0.71,0.64,0.6447,0.6423,0.2683,0.2683
8,0.6511,0.712,0.6727,0.656,0.6643,0.3013,0.3014
9,0.6579,0.7223,0.6861,0.6596,0.6726,0.3148,0.3151


In [41]:
# tunear el mejor modelo
model_tuned = tune_model(model, optimize = 'F1', choose_better=True)

Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.6698,0.7303,0.8285,0.6359,0.7195,0.3346,0.3522
1,0.625,0.7092,0.7847,0.6022,0.6815,0.2444,0.2572
2,0.6325,0.6947,0.7818,0.6108,0.6858,0.2589,0.2705
3,0.6213,0.6897,0.7927,0.5989,0.6823,0.2354,0.2498
4,0.6679,0.7168,0.7745,0.6474,0.7053,0.3318,0.3389
5,0.6791,0.721,0.8545,0.6403,0.7321,0.352,0.3752
6,0.6119,0.6854,0.7418,0.5982,0.6623,0.2183,0.2254
7,0.6381,0.7141,0.8218,0.6092,0.6997,0.2688,0.2883
8,0.6381,0.6925,0.7818,0.616,0.6891,0.2704,0.2815
9,0.643,0.7228,0.792,0.6182,0.6944,0.2805,0.2931


Processing:   0%|          | 0/7 [00:00<?, ?it/s]

Fitting 10 folds for each of 10 candidates, totalling 100 fits


In [42]:
# finalizar modelo
model_finalized = finalize_model(model_tuned)

In [43]:
save_model(model_finalized, 'models/bs_predictor_' + event_mode)

Transformation Pipeline and Model Successfully Saved


(Pipeline(memory=FastMemory(location=C:\Users\alniquia\AppData\Local\Temp\joblib),
          steps=[('label_encoding',
                  TransformerWrapperWithInverse(exclude=None, include=None,
                                                transformer=LabelEncoder())),
                 ('numerical_imputer',
                  TransformerWrapper(exclude=None,
                                     include=['avg_brawler_trophies_diff',
                                              'max_brawler_trophies_diff',
                                              'min_brawler_trophies_diff',
                                              'battle_power...
                                 colsample_bytree=1.0, feature_fraction=0.6,
                                 importance_type='split', learning_rate=0.001,
                                 max_depth=-1, min_child_samples=51,
                                 min_child_weight=0.001, min_split_gain=0.4,
                                 n_estimators=2

In [44]:
predictions = predict_model(model_finalized, data = test)
predictions['winner_team'] = predictions['winner_team'] + 1
predictions.head()

Unnamed: 0,event_map,battle_team1_player1_brawler_name,battle_team1_player2_brawler_name,battle_team1_player3_brawler_name,battle_team2_player1_brawler_name,battle_team2_player2_brawler_name,battle_team2_player3_brawler_name,avg_brawler_trophies_diff,max_brawler_trophies_diff,min_brawler_trophies_diff,battle_power_diff,winner_team,prediction_label,prediction_score
113532,Gem Fort,EL PRIMO,OTIS,JACKY,BEA,LEON,BIBI,0.005745,0.007362,0.004938,0.0,1,2,0.5497
111206,Gem Fort,LEON,RUFFS,ROSA,MORTIS,SHELLY,FRANK,-0.016939,-0.025171,-0.003953,0.0,1,1,0.5343
105191,Crystal Arcade,ROSA,MEG,GALE,FANG,MAX,SHELLY,-0.001103,-0.003306,0.0,0.0,1,1,0.5066
107511,Gem Fort,PAM,MORTIS,BELLE,EL PRIMO,DARRYL,FRANK,-0.018458,-0.035298,-0.007952,0.0,1,1,0.5346
106070,Gem Fort,EL PRIMO,SHELLY,MAISIE,SPIKE,8-BIT,MORTIS,0.004124,0.00738,0.002484,0.0,1,2,0.5326


In [45]:
def metrics_capturing(df):
	"""Captures the metrics of a classification model"""
	from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

	accuracy = accuracy_score(df['winner_team'], df['prediction_label'])
	f1 = f1_score(df['winner_team'], df['prediction_label'])
	precision = precision_score(df['winner_team'], df['prediction_label'])
	recall = recall_score(df['winner_team'], df['prediction_label'])
	# report = classification_report(df['winner_team'], df['prediction_label'])
	# matrix = confusion_matrix(df['winner_team'], df['prediction_label'])

	# return accuracy, f1, precision, recall, report, matrix
	return accuracy, f1, precision, recall

accuracy, f1, precision, recall = metrics_capturing(predictions)

# save metrics
metrics = {
		'accuracy': accuracy,
		'f1': f1,
		'precision': precision,
		'recall': recall,
}

print(metrics)

with open('resources/bs_metrics.json', 'r') as f:
	data = json.load(f)
	data[event_mode] = metrics

with open('resources/bs_metrics.json', 'w') as f:
	json.dump(data, f, indent=4)

{'accuracy': 0.654524089306698, 'f1': 0.6027027027027027, 'precision': 0.7248104008667389, 'recall': 0.5158057054741712}


In [46]:
# get feature importance
feature_importance = pd.DataFrame({
		'feature': model_finalized.booster_.feature_name(),
		'importance': model_finalized.feature_importances_
		})

feature_importance.sort_values(by='importance', ascending=False, inplace=True)

feature_importance.to_json('resources/bs_feature_importance.json', orient='records')

feature_importance

Unnamed: 0,feature,importance
415,max_brawler_trophies_diff,1693
414,avg_brawler_trophies_diff,1298
416,min_brawler_trophies_diff,1259
361,battle_team2_player3_brawler_name_SHELLY,296
24,battle_team1_player1_brawler_name_SHELLY,260
...,...,...
151,battle_team1_player3_brawler_name_FANG,0
149,battle_team1_player3_brawler_name_GRIFF,0
148,battle_team1_player3_brawler_name_LEON,0
147,battle_team1_player3_brawler_name_GENE,0
