In [4]:
import pickle
import pathlib

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, f1_score

In [5]:
DATA_DIR = pathlib.Path.cwd().parent / 'data'
print(DATA_DIR)

c:\Users\luis\Documents\6\CSRoundPrediction\data


In [6]:
clean_data_path = DATA_DIR / 'processed' / 'csgo_clean.pkl'

In [7]:
with open(clean_data_path, 'rb') as file:
    data = pickle.load(file)

Checking if the data was properly read.

In [8]:
model_data = data.copy()
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122410 entries, 0 to 122409
Data columns (total 76 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   time_left                     122410 non-null  float64
 1   ct_score                      122410 non-null  float64
 2   t_score                       122410 non-null  float64
 3   bomb_planted                  122410 non-null  bool   
 4   ct_health                     122410 non-null  float64
 5   t_health                      122410 non-null  float64
 6   ct_armor                      122410 non-null  float64
 7   t_armor                       122410 non-null  float64
 8   ct_money                      122410 non-null  float64
 9   t_money                       122410 non-null  float64
 10  ct_helmets                    122410 non-null  float64
 11  t_helmets                     122410 non-null  float64
 12  ct_defuse_kits                122410 non-nul

-------------------------------

# Splitting the data

The data will be split into be split into 2 sets: training and testing. The training set will be used to train the model and the testing set will be used to evaluate the model.

In [9]:
X = model_data.drop(columns=['round_winner']).copy()
y = model_data['round_winner'].copy()

Creating the test set with 20% of the data.

In [10]:
x_train_val, x_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Creating the train and validation sets with 80% of the data.

In [11]:
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2, random_state=42)

In [12]:
x_train.shape, x_val.shape, x_test.shape

((78342, 75), (19586, 75), (24482, 75))

--------------------------------

# Choosing the model

In [15]:
def calculate_model_quality(model,x_val, y_val):
    y_predicted = model.predict(x_val)
    accuracy = accuracy_score(y_val, y_predicted)
    f1 = f1_score(y_val, y_predicted)
    
    print(f'Accuracy: {accuracy:.2f}')
    print(f'F1: {f1:.2f}')
    

### LogisticRegression

In [None]:
# 0.75 But it doesn't converge
from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=1750
                           )
model.fit(x_train, y_train)
calculate_model_quality(model, x_val, y_val)

### RandomForestClassifier

In [26]:
# 0.87 
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=30,
    random_state=42,
    min_samples_split=2,
)

model.fit(x_train, y_train)
calculate_model_quality(model, x_val, y_val)

Accuracy: 0.87
F1: 0.87


### DecisionTreeClassifier

In [None]:
# 0.81
from sklearn import tree

model = tree.DecisionTreeClassifier(
    random_state=42,
    max_depth=60,
)

model.fit(x_train, y_train)
calculate_model_quality(model, x_val, y_val)

### GradientBoostingClassifier

In [None]:
# 0.85
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier(
    n_estimators=75, 
    learning_rate=0.8, 
    max_depth=30,
    random_state=42
)
model.fit(x_train, y_train)
calculate_model_quality(model, x_val, y_val)

### KNeighborsClassifier

In [None]:
# 0.75
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5)
model.fit(x_train, y_train)
calculate_model_quality(model, x_val, y_val)

### Mini Models!

This model was sugested by the professor, and it's idea is to split the data into several parts. Then, going through chunks of three parts, three models are trained and the majority vote is taken as the final prediction.

The cell bellow is a showcase of the model. Using just 1 split, the data will not be split into chunks, and the result should be the same as the 
[RandomForestClassifier](#randomforestclassifier)

In [25]:
import MiniModels
from sklearn.ensemble import RandomForestClassifier

Splitmodel = MiniModels.MiniModels()
model_used = RandomForestClassifier(
    n_estimators=100,
    max_depth=30,
    random_state=42,
    min_samples_split=2,
)

Splitmodel.fit(x_train, y_train, model_used, n_splits=1)
calculate_model_quality(Splitmodel, x_val, y_val)

Self.parts:
Part 0 size: (78342, 76), time_left: 175.0 - 0.03
Accuracy: 0.87
F1: 0.87


In [None]:
import MiniModels
from sklearn.ensemble import RandomForestClassifier

Splitmodel = MiniModels.MiniModels()
# model_used = RandomForestClassifier(random_state=42, max_depth=25, n_estimators=100)
model_used = RandomForestClassifier(
    n_estimators=100,
    max_depth=30,
    random_state=42,
)

Splitmodel.fit(x_train, y_train, model_used, 5)
calculate_model_quality(Splitmodel, x_val, y_val)

In [None]:
import MiniModels
from sklearn import tree

Splitmodel = MiniModels.MiniModels()
model_used = tree.DecisionTreeClassifier(random_state=42, max_depth=20)

Splitmodel.fit(x_train, y_train, model_used, 5)
calculate_model_quality(Splitmodel, x_val, y_val)

In [None]:
import MiniModels
from sklearn.ensemble import GradientBoostingClassifier

Splitmodel = MiniModels.MiniModels()
model_used = GradientBoostingClassifier(
    n_estimators=75, 
    learning_rate=0.8, 
    max_depth=30,
    random_state=42
)

Splitmodel.fit(x_train, y_train, model_used, 5)
calculate_model_quality(Splitmodel, x_val, y_val)

--------------------------------

In [None]:
# 0.87 
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=100,
    max_depth=30,
    random_state=42,
    min_samples_split=2,
)

model.fit(x_train, y_train)
calculate_model_quality(model, x_val, y_val)

-----

# Fine tuning with GridSearchCV

In [13]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.model_selection import GridSearchCV

model = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [50, 100, 200, 300],
    'max_depth': [10, 20, 30, 50],
    'min_samples_split': [2, 5, 10],
}

grid_search = GridSearchCV(model, param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(x_train_val, y_train_val)

print(grid_search.best_params_)
print(grid_search.best_score_)

Fitting 5 folds for each of 48 candidates, totalling 240 fits
{'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 300}
0.8706396765874642


# Certification

Training and evaluating the best model with the test set.

In [16]:
# 0.87 
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    n_estimators=300,
    max_depth=50,
    random_state=42,
    min_samples_split=2,
)

model.fit(x_train_val, y_train_val)
calculate_model_quality(model, x_test, y_test)

Accuracy: 0.88
F1: 0.88


Evualating model quality using a DummyClassifier.

In [17]:
# Most frequent class
most_frequent_class = y_train_val.value_counts()
print(most_frequent_class) # Print the Distribution of the classes

# Get the most frequent class
most_frequent_class = most_frequent_class.idxmax()

# Create an array with the same shape as y_test and fill it with the most frequent class
y_predicted = np.full_like(y_test, fill_value=most_frequent_class)

# Calculate the accuracy
accuracy = accuracy_score(y_test, y_predicted)
print()
print(f'Accuracy: {accuracy:.2f}')

round_winner
0    49928
1    48000
Name: count, dtype: int64

Accuracy: 0.51


Model was certified with 0.88 accuracy and beat the DummyClassifier.

----

# Preparing the Deployment

Using the elected model, the data will be trained again, but now with the full dataset.

In [None]:
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=30,
    random_state=42,
    min_samples_split=2,
)

model.fit(X, y) # Train the model with the full dataset

In [None]:
# Save the model
model_path = DATA_DIR / 'models' / 'csgo_model.pkl'

with open(model_path, 'wb') as file:
    pickle.dump(model, file)

------

# Deployment Example

If you want to test just the deployment, run the cell bellow to import the necessary libraries and functions.

In [1]:
import pickle
import pathlib
import pandas as pd
DATA_DIR = pathlib.Path.cwd().parent / 'data'
model_path = DATA_DIR / 'models' / 'csgo_model.pkl'

The cell bellow is necessary to create auxiliar functions and classes that will be used to organize the data before making the prediction.

In [2]:
class MatchData:
    def __init__(self, time_left, ct_score, t_score, map, bomb_planted, ct_defuse_kits, ct_health, t_health, ct_armor, t_armor, ct_helmets, t_helmets, ct_money, t_money, ct_players_alive, t_players_alive, ct_weapons, t_weapons):
        self.time_left = time_left
        self.ct_score = ct_score
        self.t_score = t_score
        self.map = map
        self.bomb_planted = bomb_planted
        self.ct_defuse_kits = ct_defuse_kits
        self.ct_health = ct_health
        self.t_health = t_health
        self.ct_armor = ct_armor
        self.t_armor = t_armor
        self.ct_helmets = ct_helmets
        self.t_helmets = t_helmets
        self.ct_money = ct_money
        self.t_money = t_money
        self.ct_players_alive = ct_players_alive
        self.t_players_alive = t_players_alive
        self.ct_weapons = ct_weapons
        self.t_weapons = t_weapons

# List of all possible features
ALL_FEATURES = [
    "time_left", "ct_score", "t_score", "bomb_planted", "ct_health", "t_health", "ct_armor", "t_armor", "ct_money", "t_money", "ct_helmets", "t_helmets",
    "ct_defuse_kits", "ct_players_alive", "t_players_alive", "ct_weapon_ak47", "t_weapon_ak47", "ct_weapon_aug", "t_weapon_aug", "ct_weapon_awp", "t_weapon_awp", "ct_weapon_cz75auto", "t_weapon_cz75auto", "ct_weapon_famas",
    "t_weapon_famas", "ct_weapon_galilar", "t_weapon_galilar", "ct_weapon_glock", "t_weapon_glock", "ct_weapon_m4a1s", "ct_weapon_m4a4", "t_weapon_m4a4", "ct_weapon_mac10", "t_weapon_mac10", "ct_weapon_mag7", "ct_weapon_mp9",
    "t_weapon_mp9", "ct_weapon_sg553", "t_weapon_sg553", "ct_weapon_ssg08", "t_weapon_ssg08", "ct_weapon_ump45", "t_weapon_ump45", "ct_weapon_xm1014", "ct_weapon_deagle", "t_weapon_deagle", "ct_weapon_fiveseven", "t_weapon_fiveseven", "ct_weapon_usps", "t_weapon_usps", "ct_weapon_p250",
    "t_weapon_p250", "ct_weapon_p2000", "t_weapon_p2000", "ct_weapon_tec9", "t_weapon_tec9", "ct_grenade_hegrenade", "t_grenade_hegrenade", "ct_grenade_flashbang", "t_grenade_flashbang", "ct_grenade_smokegrenade",
    "t_grenade_smokegrenade", "ct_grenade_incendiarygrenade", "t_grenade_incendiarygrenade", "ct_grenade_molotovgrenade", "t_grenade_molotovgrenade", "ct_grenade_decoygrenade", "t_grenade_decoygrenade", "de_dust2", "de_inferno", "de_mirage",
    "de_nuke", "de_overpass", "de_train", "de_vertigo"
]

def match_data_to_dataframe(features: MatchData) -> pd.DataFrame:
    # Create a DataFrame using the ALL_FEATURES list as columns. Fill the DataFrame with zeros
    data = pd.DataFrame(0, index=[0], columns=ALL_FEATURES)

    # Set the values of the features that we know
    data["time_left"] = features.time_left
    data["ct_score"] = features.ct_score
    data["t_score"] = features.t_score
    data["bomb_planted"] = features.bomb_planted
    data["ct_defuse_kits"] = features.ct_defuse_kits
    data["ct_health"] = features.ct_health
    data["t_health"] = features.t_health
    data["ct_armor"] = features.ct_armor
    data["t_armor"] = features.t_armor
    data["ct_helmets"] = features.ct_helmets
    data["t_helmets"] = features.t_helmets
    data["ct_money"] = features.ct_money
    data["t_money"] = features.t_money
    data["ct_players_alive"] = features.ct_players_alive
    data["t_players_alive"] = features.t_players_alive

    # Iterate through the weapons and add 1 to the corresponding column
    for weapon in features.ct_weapons:
        if weapon in data.columns:
            data[weapon] += 1

    for weapon in features.t_weapons:
        if weapon in data.columns:
            data[weapon] += 1

    return data

In [3]:
# Load the pickel model
with open(model_path, 'rb') as file:
    model = pickle.load(file)

For the example, we will use the Grand Final Match of the 2020 Counter Strike Global Offensive Major. 
Which can be found [here](https://www.youtube.com/watch?v=NOuvxSHu74o)

Let's predict the winner of one round of the match. We will calculate the winner on 3 moments of the round, and check how the model performs.

For the first Scene we have the following situation:

<img src="../imgs/firstScene.png" alt="First Scene" style="max-width: 60%; height: auto;">


This scene was captured at the begginning of the round, when the players are still in their spawn areas.

In [4]:
features = MatchData(
    time_left=90,
    ct_score=8,
    t_score=0,
    map="de_nuke",
    bomb_planted=False,
    ct_defuse_kits=5,
    ct_health=500, # 5 players with 100 health
    t_health=500, # 5 players with 100 health
    ct_armor=500, # 5 players with 100 armor
    t_armor=500,
    ct_helmets=5, # All players have helmets 
    t_helmets=5,
    ct_money=29950,
    t_money=5100,
    ct_players_alive=5,
    t_players_alive=5,
    ct_weapons=["ct_weapon_m4a4","ct_weapon_m4a4","ct_weapon_ak47", "ct_weapon_m4a4", "ct_weapon_awp"], # 3 M4A4, 1 AK47, 1 AWP 
    t_weapons=["t_weapon_ak47","t_weapon_awp","t_weapon_ak47", "t_weapon_ak47", "t_weapon_sg553"]  # 3 AK47, 1 AWP, 1 SG553  
                                                                                                    # These are the main weapons that the players are using in this moment of the round
)

df = match_data_to_dataframe(features)

# Predict the outcome of the round
prediction = model.predict(df)
print("Terrrorists win" if prediction[0] == 0 else "Counter-Teorrist win")


Terrrorists win


Now, let's predict the winner of the same round, but in a different moment. At this moment the players are already in the bombsite, and the bomb is being planted. There was already losses on both sides.

For the second Scene we have the following situation:

<img src="../imgs/secondScene.png" alt="Second Scene" style="max-width: 60%; height: auto;">

In [7]:
features2 = MatchData(
    time_left=40, # 40 seconds left, bomb was just planted 
    ct_score=8,
    t_score=0,
    map="de_nuke",
    bomb_planted=True,
    ct_defuse_kits=3,
    ct_health=300, # 3 players with 100 health
    t_health=11+39+100+88, # Players already took some damage, thus have different health values
    ct_armor=300, # 3 players with 100 armor
    t_armor=400, # 4 players with 100 armor
    ct_helmets=3, # 3 players left with helmets 
    t_helmets=5,
    ct_money=29950,
    t_money=5100,
    ct_players_alive=4,
    t_players_alive=3,
    ct_weapons=["ct_weapon_m4a4","ct_weapon_ak47","ct_weapon_m4a4"], 
    t_weapons=["t_weapon_ak47","t_weapon_awp", "t_weapon_ak47", "t_weapon_sg553"]

)

df = match_data_to_dataframe(features2)

# Predict the outcome of the round
prediction = model.predict(df)
print("Terrrorists win" if prediction[0] == 0 else "Counter-Teorrist win")


Terrrorists win


In the cenario above, the model again predicted the winner of the round as the T side.

Now for the third and final scene, we have the following situation:

<img src="../imgs/thirdScene.png" alt="Third Scene" style="max-width: 60%; height: auto;">

In [8]:
features3 = MatchData(
    time_left=20, # 
    ct_score=8,
    t_score=0,
    map="de_nuke",
    bomb_planted=True,
    ct_defuse_kits=3,
    ct_health=8+100+100, # 3 players with 100 health
    t_health=11+39+88, # Players already took some damage, thus have different health values
    ct_armor=300, # 3 players with 100 armor
    t_armor=300, # 3 players with 100 armor
    ct_helmets=3, # 3 players left with helmets 
    t_helmets=3,
    ct_money=29950,
    t_money=5100,
    ct_players_alive=3,
    t_players_alive=3,
    ct_weapons=["ct_weapon_m4a4","ct_weapon_ak47","ct_weapon_m4a4"], 
    t_weapons=["t_weapon_ak47","t_weapon_awp", "t_weapon_ak47", "t_weapon_sg553"]

)

df = match_data_to_dataframe(features3)

# Predict the outcome of the round
prediction = model.predict(df)
print("Terrrorists win" if prediction[0] == 0 else "Counter-Teorrist win")


Terrrorists win


The result of the round was the same as the model predicted. The T side won the round.

<img src="../imgs/result.png" alt="Third Scene" style="max-width: 60%; height: auto;">

The result was consistent with the previous predictions, and the model predicted the T side ( Ilustrated as the orange side in the image) as the winner of the round. The last two scenes make clear the advantage of the T side at the moment of the prediction. So the model was able to predict the winner of the round with a good accuracy.

For a final analyse of the model, let`s check another map of the same match (The finals are decided on a best of 5 maps). The second map of the match was played on the map dust 2. The image below ilustrates the situation of the round at the moment of the prediction.

<img src="../imgs/otherRound.png" alt="Inferno" style="max-width: 60%; height: auto;">

In [None]:
features4 = MatchData(
    time_left=50, # 
    ct_score=5,
    t_score=2,
    map="de_dust2",
    bomb_planted=False,
    ct_defuse_kits=0,
    ct_health=100+100+100, # 3 players with 100 health
    t_health=100+100, # 2 players with 100 health
    ct_armor=300, # 3 players with 100 armor
    t_armor=200, # 2 players with 100 armor
    ct_helmets=3, # 3 players left with helmets 
    t_helmets=2,
    ct_money=400+450+200,
    t_money=0+1400,
    ct_players_alive=3,
    t_players_alive=2,
    ct_weapons=["ct_weapon_m4a4","ct_weapon_m4a4","ct_weapon_awp"], # These are the main weapons that the players are using in this moment of the round
    t_weapons=["t_weapon_galilar","t_weapon_awp"] # These are the main weapons that the players are using in this moment of the round

)

df = match_data_to_dataframe(features4)

# Predict the outcome of the round
prediction = model.predict(df)
print("Terrrorists win" if prediction[0] == 0 else "Counter-Teorrist win")


Counter-Teorrist win


The prediction of Counter Terrorist side as the winner of the round was correct. The result of the round was the same as the model predicted and it's ilustrated on the image below (Showing the Counter-Terrorist as the blue side).

<img src="../imgs/End.png" alt="Inferno" style="max-width: 60%; height: auto;">

For more prediction use this [website](https://prediction.fernandoa.dev/), which shows a dashboard allows the user to make a prediction of a Round.

# Final Consideration:

The research conducted by the students represents a comprehensive investigation into the dynamics of Counter Strike: Global Offensive (CS:GO). The students successfully identified and eliminated irrelevant features and were able to find and train a model to predict the winner of a Round.

The trained model got satisfactory results, suggesting that it can effectively predict specific outcomes based on game behaviors. This indicates that the features chosen and the model configuration were aligned with the underlying patterns of the game, enabling a reasonable level of accuracy in its predictions.

While the model’s performance is promising, it should be noted that the dynamic nature of human interaction in games like CS:GO means that real-world results may vary from the model's predictions.
Counter Strike, being a human-played game, involves complexities and unpredictable behaviors that may not always be captured accurately by a machine learning model. Human players often introduce elements of randomness, which can differ from the scenarios the model was trained on.