In [37]:
import pandas as pd
from ydata_profiling import ProfileReport
import numpy as np


In [38]:
df = pd.read_excel("csgo.xlsx")
df.head()

Unnamed: 0,map,day,month,year,date,wait_time_s,match_time_s,team_a_rounds,team_b_rounds,ping,kills,assists,deaths,mvps,hs_percent,points,result
0,Mirage,3,8,2018,3/8/2018,327,2906,16,13,215,17,2,21,2,5,45,Win
1,Mirage,2,8,2018,2/8/2018,336,2592,16,11,199,13,4,24,2,0,40,Lost
2,Mirage,31,7,2018,31/7/2018,414,2731,16,14,85,15,3,18,3,26,37,Win
3,Mirage,31,7,2018,31/7/2018,317,2379,11,16,93,12,2,15,2,16,30,Lost
4,Mirage,30,7,2018,30/7/2018,340,3467,15,15,94,33,5,20,5,30,83,Tie


In [39]:
# profile = ProfileReport(df, title = "Csgo_classification", explorative= True)
# profile.to_file('CSGO.html')


In [40]:
y = df['result']
x = df.drop(['day', 'month', 'year', 'date', 'match_time_s', 'team_a_rounds', 'team_b_rounds', 'wait_time_s', 'result'], axis=1)
print(f"X: \n{x.head()}")
print(f"Y:\n {y.head()}")


X: 
      map  ping  kills  assists  deaths  mvps  hs_percent  points
0  Mirage   215     17        2      21     2           5      45
1  Mirage   199     13        4      24     2           0      40
2  Mirage    85     15        3      18     3          26      37
3  Mirage    93     12        2      15     2          16      30
4  Mirage    94     33        5      20     5          30      83
Y:
 0     Win
1    Lost
2     Win
3    Lost
4     Tie
Name: result, dtype: object


In [41]:
from sklearn.model_selection import train_test_split
#SimpleImputer: điền null 
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2,random_state=42)


In [42]:
from lazypredict.Supervised import LazyClassifier
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models,predictions = clf.fit(x_train, x_test, y_train, y_test)
models

In [43]:
#Pipeline
from sklearn.pipeline import Pipeline
num_transform = Pipeline(steps= [
    ('imputer', SimpleImputer(strategy= 'median')),
    ('scaler', StandardScaler())
])
nominal_transform = Pipeline(steps = [
    ('imputer', SimpleImputer(strategy= 'most_frequent')),
    ('encoder', OneHotEncoder(sparse_output= False) )

])

In [44]:
#ColumnTransformer
from sklearn.compose import ColumnTransformer
preprocessor = ColumnTransformer(transformers=[
    ("nums_feature", num_transform, ["ping", "kills", "assists", "deaths", "mvps", "hs_percent", "points"]),
    ("nom_feature", nominal_transform, ["map"])
])

In [45]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
reg = Pipeline(steps =[
    ("preprocessor", preprocessor),
    ("model", RandomForestClassifier(random_state=42))
])
reg.fit(x_train, y_train)

In [46]:
y_predict = reg.predict(x_test)
for i, j in zip(y_predict, y_test):
    print(f"Predict Value: {i} ---> Actual Value: {j}")

Predict Value: Win ---> Actual Value: Lost
Predict Value: Win ---> Actual Value: Win
Predict Value: Win ---> Actual Value: Win
Predict Value: Lost ---> Actual Value: Lost
Predict Value: Lost ---> Actual Value: Tie
Predict Value: Lost ---> Actual Value: Lost
Predict Value: Lost ---> Actual Value: Win
Predict Value: Win ---> Actual Value: Win
Predict Value: Lost ---> Actual Value: Lost
Predict Value: Win ---> Actual Value: Win
Predict Value: Lost ---> Actual Value: Lost
Predict Value: Win ---> Actual Value: Win
Predict Value: Win ---> Actual Value: Win
Predict Value: Lost ---> Actual Value: Tie
Predict Value: Win ---> Actual Value: Win
Predict Value: Lost ---> Actual Value: Lost
Predict Value: Lost ---> Actual Value: Lost
Predict Value: Lost ---> Actual Value: Tie
Predict Value: Win ---> Actual Value: Win
Predict Value: Tie ---> Actual Value: Lost
Predict Value: Win ---> Actual Value: Lost
Predict Value: Lost ---> Actual Value: Tie
Predict Value: Lost ---> Actual Value: Tie
Predict Value

In [47]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_predict))

              precision    recall  f1-score   support

        Lost       0.67      0.77      0.72       105
         Tie       0.40      0.10      0.15        21
         Win       0.76      0.76      0.76       101

    accuracy                           0.70       227
   macro avg       0.61      0.54      0.54       227
weighted avg       0.69      0.70      0.69       227



In [48]:
#GridSearch
from sklearn.model_selection import GridSearchCV
params = {
    "preprocessor__nums_feature__imputer__strategy" : ["mean", "median"],
    "model__n_estimators" : [100,200,300],
    "model__criterion" : ["gini", "entropy", "log_loss"]
}
grid_search = GridSearchCV(estimator=reg, param_grid=params, cv=2, scoring= "precision", verbose=2)
grid_search.fit(x_train, y_train)


Fitting 2 folds for each of 18 candidates, totalling 36 fits
[CV] END model__criterion=gini, model__n_estimators=100, preprocessor__nums_feature__imputer__strategy=mean; total time=   0.0s
[CV] END model__criterion=gini, model__n_estimators=100, preprocessor__nums_feature__imputer__strategy=mean; total time=   0.0s
[CV] END model__criterion=gini, model__n_estimators=100, preprocessor__nums_feature__imputer__strategy=median; total time=   0.0s
[CV] END model__criterion=gini, model__n_estimators=100, preprocessor__nums_feature__imputer__strategy=median; total time=   0.1s
[CV] END model__criterion=gini, model__n_estimators=200, preprocessor__nums_feature__imputer__strategy=mean; total time=   0.2s
[CV] END model__criterion=gini, model__n_estimators=200, preprocessor__nums_feature__imputer__strategy=mean; total time=   0.2s
[CV] END model__criterion=gini, model__n_estimators=200, preprocessor__nums_feature__imputer__strategy=median; total time=   0.2s
[CV] END model__criterion=gini, model

In [49]:
print(grid_search.best_estimator_)
print(grid_search.best_score_)
print(grid_search.best_params_)
y_predict2 = grid_search.predict(x_test)
for i, j in zip(y_predict2, y_test.values):
    print(f"Predict value : {i}---> Actual Value: {j}")

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('nums_feature',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer()),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['ping', 'kills', 'assists',
                                                   'deaths', 'mvps',
                                                   'hs_percent', 'points']),
                                                 ('nom_feature',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='most_frequent')),
                                                                  ('encoder',
                  

In [50]:
print(classification_report(y_test, y_predict2))
print()
print()
print()
print(classification_report(y_test, y_predict))


              precision    recall  f1-score   support

        Lost       0.67      0.77      0.72       105
         Tie       0.40      0.10      0.15        21
         Win       0.76      0.76      0.76       101

    accuracy                           0.70       227
   macro avg       0.61      0.54      0.54       227
weighted avg       0.69      0.70      0.69       227




              precision    recall  f1-score   support

        Lost       0.67      0.77      0.72       105
         Tie       0.40      0.10      0.15        21
         Win       0.76      0.76      0.76       101

    accuracy                           0.70       227
   macro avg       0.61      0.54      0.54       227
weighted avg       0.69      0.70      0.69       227



In [51]:
# from sklearn.linear_model import LogisticRegression
# reg2 = Pipeline(steps =[
#     ("preprocessor", preprocessor),
#     ("model", LogisticRegression())
# ])
# reg2.fit(x_train, y_train)
# y_predict2 = reg2.predict(x_test)
# from sklearn.metrics import classification_report
# print(classification_report(y_test, y_predict2))