In [105]:
import pickle
import pathlib

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import root_mean_squared_error

In [106]:
DATA_DIR = pathlib.Path.cwd().parent / 'data'
print(DATA_DIR)

/home/fernando/6/machinelearning/machine_learning/CSRoundPrediction/data


In [107]:
clean_data_path = DATA_DIR / 'processed' / 'csgo_clean.pkl'

In [108]:
with open(clean_data_path, 'rb') as file:
    data = pickle.load(file)

In [109]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122410 entries, 0 to 122409
Data columns (total 70 columns):
 #   Column                        Non-Null Count   Dtype   
---  ------                        --------------   -----   
 0   time_left                     122410 non-null  float64 
 1   ct_score                      122410 non-null  float64 
 2   t_score                       122410 non-null  float64 
 3   map                           122410 non-null  category
 4   bomb_planted                  122410 non-null  bool    
 5   ct_health                     122410 non-null  float64 
 6   t_health                      122410 non-null  float64 
 7   ct_armor                      122410 non-null  float64 
 8   t_armor                       122410 non-null  float64 
 9   ct_money                      122410 non-null  float64 
 10  t_money                       122410 non-null  float64 
 11  ct_helmets                    122410 non-null  float64 
 12  t_helmets                     

In [110]:
model_data = data.copy()

In [111]:
categorical_columns = ['map']

## Encoding Categorical Variable

In [112]:
model_data['map'].value_counts()

map
de_inferno     23811
de_dust2       22144
de_nuke        19025
de_mirage      18576
de_overpass    14081
de_train       13491
de_vertigo     11137
de_cache         145
Name: count, dtype: int64

In [113]:
original_data = model_data['map']
encoded_data = pd.get_dummies(original_data, drop_first=True)

aux_dataframe = encoded_data
aux_dataframe['map'] = original_data.copy()

aux_dataframe.head().transpose()

Unnamed: 0,0,1,2,3,4
de_dust2,True,True,True,True,True
de_inferno,False,False,False,False,False
de_mirage,False,False,False,False,False
de_nuke,False,False,False,False,False
de_overpass,False,False,False,False,False
de_train,False,False,False,False,False
de_vertigo,False,False,False,False,False
map,de_dust2,de_dust2,de_dust2,de_dust2,de_dust2


In [114]:
model_data = pd.get_dummies(model_data, drop_first=True)

In [116]:
model_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122410 entries, 0 to 122409
Data columns (total 76 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   time_left                     122410 non-null  float64
 1   ct_score                      122410 non-null  float64
 2   t_score                       122410 non-null  float64
 3   bomb_planted                  122410 non-null  bool   
 4   ct_health                     122410 non-null  float64
 5   t_health                      122410 non-null  float64
 6   ct_armor                      122410 non-null  float64
 7   t_armor                       122410 non-null  float64
 8   ct_money                      122410 non-null  float64
 9   t_money                       122410 non-null  float64
 10  ct_helmets                    122410 non-null  float64
 11  t_helmets                     122410 non-null  float64
 12  ct_defuse_kits                122410 non-nul

## Train test split

In [169]:
X = model_data.drop(columns=['round_winner']).copy()
y = model_data['round_winner'].copy()

In [170]:
X.values, y.values

(array([[175.0, 0.0, 0.0, ..., False, False, False],
        [156.03, 0.0, 0.0, ..., False, False, False],
        [96.03, 0.0, 0.0, ..., False, False, False],
        ...,
        [114.93, 11.0, 15.0, ..., False, True, False],
        [94.93, 11.0, 15.0, ..., False, True, False],
        [74.93, 11.0, 15.0, ..., False, True, False]], dtype=object),
 array([1, 1, 1, ..., 0, 0, 0]))

In [184]:
Xtrain, Xtest, ytrain, ytest = train_test_split(
    X,
    y,
    test_size=0.25,
    random_state=69,
)


In [185]:
X.shape, Xtrain.shape, Xtest.shape

((122410, 75), (91807, 75), (30603, 75))

In [186]:
y.shape, ytrain.shape, ytest.shape

((122410,), (91807,), (30603,))

In [187]:
model = LinearRegression()

model.fit(Xtrain, ytrain)

In [179]:
# Random forest
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=5,
    random_state=69,
)

model.fit(Xtrain, ytrain)

In [132]:
# Gradient descent
from sklearn.linear_model import SGDClassifier

model = SGDClassifier()

model.fit(Xtrain, ytrain)

In [137]:
# Gradient boosting
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()

model.fit(Xtrain, ytrain)

In [142]:
from sklearn.neural_network import MLPClassifier

model = MLPClassifier()

model.fit(Xtrain, ytrain)

In [147]:
from sklearn import tree

model = tree.DecisionTreeClassifier()

model.fit(Xtrain, ytrain)

In [152]:
from sklearn import svm

model = svm.SVC()

model.fit(Xtrain, ytrain)

In [188]:
ypred = model.predict(Xtest)

In [189]:
RMSE = root_mean_squared_error(ytest, ypred)

In [190]:
RMSE

np.float64(0.4038709867244479)

In [191]:
error_percent = 100 * (10**RMSE - 1)
print(f'Average error is {error_percent:.2f}%')

Average error is 153.44%
