In [None]:
import pandas as pd
import numpy as np

from ydata_profiling import ProfileReport

from sklearn.model_selection import train_test_split

from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OrdinalEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report

data = pd.read_csv("csgo.csv")
data

# print(data.info())
# profile = ProfileReport(data, title="Diabetes Report", explorative=True)
# profile.to_file("report.html")

Unnamed: 0,map,day,month,year,date,wait_time_s,match_time_s,team_a_rounds,team_b_rounds,ping,kills,assists,deaths,mvps,hs_percent,points,result
0,Mirage,3,8,2018,03/08/2018,327,2906,16,13,215,17,2,21,2,5,45,Win
1,Mirage,2,8,2018,02/08/2018,336,2592,16,11,199,13,4,24,2,0,40,Lost
2,Mirage,31,7,2018,31/07/2018,414,2731,16,14,85,15,3,18,3,26,37,Win
3,Mirage,31,7,2018,31/07/2018,317,2379,11,16,93,12,2,15,2,16,30,Lost
4,Mirage,30,7,2018,30/07/2018,340,3467,15,15,94,33,5,20,5,30,83,Tie
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1128,Dust II,23,7,2015,23/07/2015,2,1573,16,4,46,12,2,20,0,16,26,Lost
1129,Dust II,23,7,2015,23/07/2015,29,2126,16,8,41,19,6,21,2,31,51,Lost
1130,Dust II,23,7,2015,23/07/2015,10,2555,11,16,17,9,3,20,2,55,30,Lost
1131,Dust II,23,7,2015,23/07/2015,9,2293,8,16,20,11,4,20,1,27,31,Lost


In [15]:
# 1) Drop team_a_rounds, team_b_rounds columns
data = data.drop(columns=['team_a_rounds', 'team_b_rounds'])

# 2) Convert result column into numerical labels
data['result'] = data['result'].map({'Win': 1, 'Lost': 0, 'Tie': 2})

# 3) Convert date column into timestamp
data['date'] = pd.to_datetime(data['date'], format='%d/%m/%Y').astype(np.int64) // 10 ** 9
data

Unnamed: 0,map,day,month,year,date,wait_time_s,match_time_s,ping,kills,assists,deaths,mvps,hs_percent,points,result
0,Mirage,3,8,2018,1533254400,327,2906,215,17,2,21,2,5,45,1
1,Mirage,2,8,2018,1533168000,336,2592,199,13,4,24,2,0,40,0
2,Mirage,31,7,2018,1532995200,414,2731,85,15,3,18,3,26,37,1
3,Mirage,31,7,2018,1532995200,317,2379,93,12,2,15,2,16,30,0
4,Mirage,30,7,2018,1532908800,340,3467,94,33,5,20,5,30,83,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1128,Dust II,23,7,2015,1437609600,2,1573,46,12,2,20,0,16,26,0
1129,Dust II,23,7,2015,1437609600,29,2126,41,19,6,21,2,31,51,0
1130,Dust II,23,7,2015,1437609600,10,2555,17,9,3,20,2,55,30,0
1131,Dust II,23,7,2015,1437609600,9,2293,20,11,4,20,1,27,31,0


In [None]:
# 4) Separate features and target columns
target = 'result'
x = data.drop(labels=target, axis=1)
y = data[target]

# 5) Split the data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(x_train.shape, x_test.shape)

In [None]:
# 6) Data preprocessing
pre_processor = ColumnTransformer(transformers=[
    ('num_features', StandardScaler(), ['date', 'wait_time_s', 'match_time_s', 'ping', 'kills', 'assists', 'deaths', 'mvps', 'hs_percent', 'points']),
    ('nom_features', OneHotEncoder(handle_unknown='ignore'), ['map']),
    ('ord_features', OrdinalEncoder(), ['day', 'month', 'year'])
])

In [None]:
# 7) Initialize model
model = Pipeline(steps=[
    ('pre_processor', pre_processor),
    ('classifier', RandomForestClassifier(random_state=100))
])

# 7) Train model
# model.fit(x_train, y_train)

param_grid = {
    "classifier__n_estimators": [50, 100, 200],
    "classifier__criterion": ["gini", "entropy", "log_loss"],
    "classifier__max_depth": [None, 2, 5],
}
model_gr = GridSearchCV(estimator=model, param_grid=param_grid, scoring="f1_micro", cv=6, verbose=2, n_jobs=-1)
model_gr.fit(x_train, y_train)

print("Best score: {}".format(model_gr.best_score_))
print("Best params: {}".format(model_gr.best_params_))

In [None]:
# 8) Test model
# y_predict = model.predict(x_test)
y_predict = model_gr.predict(x_test)
for i, j in zip(y_predict, y_test):
    print('Predicted value: {}. Actual value: {}'.format(i, j))

print(classification_report(y_test, y_predict))

Fitting 6 folds for each of 3 candidates, totalling 18 fits
Best score: 0.7439293598233996
Best params: {'classifier__n_estimators': 200}
Predicted value: 1. Actual value: 0
Predicted value: 1. Actual value: 1
Predicted value: 1. Actual value: 1
Predicted value: 0. Actual value: 0
Predicted value: 0. Actual value: 2
Predicted value: 0. Actual value: 0
Predicted value: 0. Actual value: 1
Predicted value: 1. Actual value: 1
Predicted value: 0. Actual value: 0
Predicted value: 1. Actual value: 1
Predicted value: 0. Actual value: 0
Predicted value: 1. Actual value: 1
Predicted value: 1. Actual value: 1
Predicted value: 0. Actual value: 2
Predicted value: 1. Actual value: 1
Predicted value: 0. Actual value: 0
Predicted value: 0. Actual value: 0
Predicted value: 0. Actual value: 2
Predicted value: 1. Actual value: 1
Predicted value: 2. Actual value: 0
Predicted value: 1. Actual value: 0
Predicted value: 0. Actual value: 2
Predicted value: 0. Actual value: 2
Predicted value: 1. Actual value: 