In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier


# import ydata_profiling  as ProfileReport


In [2]:
df = pd.read_csv('Datasets\csgo.csv')
# profile = ProfileReport.ProfileReport(df, title='Pandas Profiling Report', explorative=True)
# profile.to_file("csgo.html")

In [3]:
### list of un-used columns
unused_columns = ['date','month', 'year', 'wait_time_s']

### drop un-used columns
df = df.drop(columns=unused_columns)

In [4]:
## get categorical columns
categorical_columns = df.select_dtypes(include=['object']).columns

## unique values in categorical columns
for col in ['map', 'result']:
    print(col, df[col].unique())

map ['Mirage' 'Dust II' 'Cache' 'Overpass' 'Cobblestone' 'Inferno' 'Austria'
 'Canals' 'Nuke' 'Italy']
result ['Win' 'Lost' 'Tie']


In [5]:
# df['result'] = df['result'].map({'Win': 1, 'Tie': 0, 'Lost': -1})
df = pd.get_dummies(df, columns=['map'])


In [6]:
# plt.figure(figsize=(15,15))
# sns.heatmap(df.corr(), annot=True, fmt=".2f")

In [7]:
## Train data

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1133 entries, 0 to 1132
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   day              1133 non-null   float64
 1   match_time_s     1133 non-null   float64
 2   team_a_rounds    1133 non-null   float64
 3   team_b_rounds    1133 non-null   float64
 4   ping             1133 non-null   float64
 5   kills            1133 non-null   float64
 6   assists          1133 non-null   float64
 7   deaths           1133 non-null   float64
 8   mvps             1133 non-null   float64
 9   hs_percent       1133 non-null   float64
 10  points           1133 non-null   float64
 11  result           1133 non-null   object 
 12  map_Austria      1133 non-null   bool   
 13  map_Cache        1133 non-null   bool   
 14  map_Canals       1133 non-null   bool   
 15  map_Cobblestone  1133 non-null   bool   
 16  map_Dust II      1133 non-null   bool   
 17  map_Inferno   

In [8]:
target = 'result'
X = df.drop(columns=[target])
y = df[target]

In [9]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(906, 21) (227, 21) (906,) (227,)


In [11]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

# Choose model

## Logistic Regression

In [12]:
# params = {
#     "penalty": ['l1', 'l2'],
#     "C": [0.1, 0.5, 1, 5, 10],
#     "solver": ['liblinear'],
#     "max_iter": [100, 200, 300],
#     "class_weight": ['balanced', None],
#     "random_state": [100]
# }
# model = GridSearchCV(LogisticRegression(random_state=100),param_grid=params, scoring = 'accuracy', cv=6, verbose=2, n_jobs= -1)
# model.fit(x_train, y_train)

# y_pred = model.predict(x_test)

# print(classification_report(y_test, y_pred))

## SVM

In [13]:
# model = SVC()
# model.fit(x_train, y_train)
# y_pred = model.predict(x_test)

# print(classification_report(y_test, y_pred))

## Random Forest

In [14]:
params = {
    "n_estimators": [50, 100, 200, 300, 400, 500],
    "criterion": ['gini', 'entropy'],
    "max_depth": [10, 20, 30, 40, 50],
    "random_state": [100]
}

model = GridSearchCV(RandomForestClassifier(),param_grid=params, scoring = 'accuracy', cv=6, verbose=2, n_jobs= -1)
model.fit(x_train, y_train)
y_pred = model.predict(x_test)

print(classification_report(y_test, y_pred))

Fitting 6 folds for each of 60 candidates, totalling 360 fits


              precision    recall  f1-score   support

        Lost       0.80      0.82      0.81       105
         Tie       1.00      1.00      1.00        21
         Win       0.81      0.78      0.79       101

    accuracy                           0.82       227
   macro avg       0.87      0.87      0.87       227
weighted avg       0.82      0.82      0.82       227



In [15]:
print(model.best_params_, model.best_score_)

{'criterion': 'entropy', 'max_depth': 30, 'n_estimators': 200, 'random_state': 100} 0.8123620309050773


In [16]:
print(confusion_matrix(y_test, y_pred))

[[86  0 19]
 [ 0 21  0]
 [22  0 79]]
